diff --git a/.ci/metrics/metrics.py b/.ci/metrics/metrics.py index 354b5058100e7..bd2b51154768d 100644 --- a/.ci/metrics/metrics.py +++ b/.ci/metrics/metrics.py @@ -3,6 +3,7 @@ import os from dataclasses import dataclass import sys +import logging import github from github import Github @@ -192,6 +193,10 @@ def get_per_workflow_metrics( # in nanoseconds. created_at_ns = int(created_at.timestamp()) * 10**9 + logging.info( + f"Adding a job metric for job {workflow_job.id} in workflow {workflow_run.id}" + ) + workflow_metrics.append( JobMetrics( workflow_run.name + "-" + workflow_job.name, @@ -220,7 +225,7 @@ def upload_metrics(workflow_metrics, metrics_userid, api_key): """ if len(workflow_metrics) == 0: - print("No metrics found to upload.", file=sys.stderr) + logging.info("No metrics found to upload.") return metrics_batch = [] @@ -249,9 +254,7 @@ def upload_metrics(workflow_metrics, metrics_userid, api_key): ) if response.status_code < 200 or response.status_code >= 300: - print( - f"Failed to submit data to Grafana: {response.status_code}", file=sys.stderr - ) + logging.info(f"Failed to submit data to Grafana: {response.status_code}") def main(): @@ -275,7 +278,7 @@ def main(): current_metrics += get_sampled_workflow_metrics(github_repo) upload_metrics(current_metrics, grafana_metrics_userid, grafana_api_key) - print(f"Uploaded {len(current_metrics)} metrics", file=sys.stderr) + logging.info(f"Uploaded {len(current_metrics)} metrics") for workflow_metric in reversed(current_metrics): if isinstance(workflow_metric, JobMetrics): @@ -287,4 +290,5 @@ def main(): if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) main() diff --git a/.github/workflows/build-metrics-container.yml b/.github/workflows/build-metrics-container.yml index 751ab679411dc..af4d599f76417 100644 --- a/.github/workflows/build-metrics-container.yml +++ b/.github/workflows/build-metrics-container.yml @@ -20,14 +20,14 @@ on: jobs: build-metrics-container: if: github.repository_owner == 'llvm' - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 outputs: container-name: ${{ steps.vars.outputs.container-name }} container-name-tag: ${{ steps.vars.outputs.container-name-tag }} container-filename: ${{ steps.vars.outputs.container-filename }} steps: - name: Checkout LLVM - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .ci/metrics/ - name: Write Variables @@ -49,7 +49,7 @@ jobs: run: | podman save ${{ steps.vars.outputs.container-name-tag }} > ${{ steps.vars.outputs.container-filename }} - name: Upload Container Image - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: container path: ${{ steps.vars.outputs.container-filename }} @@ -66,7 +66,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - name: Download Container - uses: actions/download-artifact@v4 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: container - name: Push Container diff --git a/.github/workflows/ci-post-commit-analyzer.yml b/.github/workflows/ci-post-commit-analyzer.yml index d614dd07b3a49..e0d30b66f66b6 100644 --- a/.github/workflows/ci-post-commit-analyzer.yml +++ b/.github/workflows/ci-post-commit-analyzer.yml @@ -44,7 +44,7 @@ jobs: uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: Setup ccache - uses: hendrikmuhs/ccache-action@v1 + uses: hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17 with: # A full build of llvm, clang, lld, and lldb takes about 250MB # of ccache space. There's not much reason to have more than this, diff --git a/.github/workflows/commit-access-review.yml b/.github/workflows/commit-access-review.yml index f9195a1863dee..5ab0ddd50d241 100644 --- a/.github/workflows/commit-access-review.yml +++ b/.github/workflows/commit-access-review.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Fetch LLVM sources - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install dependencies run: | diff --git a/.github/workflows/containers/github-action-ci/Dockerfile b/.github/workflows/containers/github-action-ci/Dockerfile index 377b8f14402ee..bd3720017b7f7 100644 --- a/.github/workflows/containers/github-action-ci/Dockerfile +++ b/.github/workflows/containers/github-action-ci/Dockerfile @@ -40,7 +40,7 @@ RUN cmake -B ./build -G Ninja ./llvm \ RUN ninja -C ./build stage2-clang-bolt stage2-install-distribution && ninja -C ./build install-distribution FROM base as ci-container - + COPY --from=stage1-toolchain $LLVM_SYSROOT $LLVM_SYSROOT # Need to install curl for hendrikmuhs/ccache-action @@ -49,7 +49,7 @@ COPY --from=stage1-toolchain $LLVM_SYSROOT $LLVM_SYSROOT # Need git for SPIRV-Tools tests. RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ - binutils \ + binutils \ cmake \ curl \ git \ @@ -59,7 +59,6 @@ RUN apt-get update && \ perl-modules \ python3-psutil \ sudo \ - # These are needed by the premerge pipeline. Pip is used to install # dependent python packages and ccache is used for build caching. File and # tzdata are used for tests. diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index b4fa27203236a..b5f76d761af22 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -51,7 +51,7 @@ on: jobs: check-docs-build: name: "Test documentation build" - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 if: github.repository == 'llvm/llvm-project' steps: # Don't fetch before checking for file changes to force the file changes @@ -60,12 +60,12 @@ jobs: # a local checkout beforehand. - name: Fetch LLVM sources (Push) if: ${{ github.event_name == 'push' }} - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 1 - name: Get subprojects that have doc changes id: docs-changed-subprojects - uses: tj-actions/changed-files@v39 + uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8 # v45.0.7 with: files_yaml: | llvm: @@ -98,11 +98,11 @@ jobs: - '.github/workflows/docs.yml' - name: Fetch LLVM sources (PR) if: ${{ github.event_name == 'pull_request' }} - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 1 - name: Setup Python env - uses: actions/setup-python@v5 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: '3.11' cache: 'pip' @@ -216,7 +216,7 @@ jobs: mkdir built-docs/flang cp -r flang-build/docs/* built-docs/flang/ - name: Upload docs - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: docs-output path: built-docs/ diff --git a/.github/workflows/email-check.yaml b/.github/workflows/email-check.yaml index 8f32d020975f5..f4481d5cf5583 100644 --- a/.github/workflows/email-check.yaml +++ b/.github/workflows/email-check.yaml @@ -10,11 +10,11 @@ permissions: jobs: validate_email: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 if: github.repository == 'llvm/llvm-project' steps: - name: Fetch LLVM sources - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.event.pull_request.head.sha }} diff --git a/.github/workflows/issue-release-workflow.yml b/.github/workflows/issue-release-workflow.yml index 5027d4f3ea6f1..efd045990d013 100644 --- a/.github/workflows/issue-release-workflow.yml +++ b/.github/workflows/issue-release-workflow.yml @@ -32,7 +32,7 @@ env: jobs: backport-commits: name: Backport Commits - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 permissions: issues: write pull-requests: write @@ -42,7 +42,7 @@ jobs: contains(github.event.action == 'opened' && github.event.issue.body || github.event.comment.body, '/cherry-pick') steps: - name: Fetch LLVM sources - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: llvm/llvm-project # GitHub stores the token used for checkout and uses it for pushes diff --git a/.github/workflows/issue-subscriber.yml b/.github/workflows/issue-subscriber.yml index ef4fdf4418193..de1c45c944960 100644 --- a/.github/workflows/issue-subscriber.yml +++ b/.github/workflows/issue-subscriber.yml @@ -10,11 +10,11 @@ permissions: jobs: auto-subscribe: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 if: github.repository == 'llvm/llvm-project' steps: - name: Checkout Automation Script - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: llvm/utils/git/ ref: main diff --git a/.github/workflows/issue-write.yml b/.github/workflows/issue-write.yml index 5334157a7fd20..ad17d9a33ddb7 100644 --- a/.github/workflows/issue-write.yml +++ b/.github/workflows/issue-write.yml @@ -14,7 +14,7 @@ permissions: jobs: pr-comment: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 permissions: pull-requests: write if: > @@ -25,7 +25,7 @@ jobs: ) steps: - name: Fetch Sources - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/workflows/unprivileged-download-artifact/action.yml @@ -39,7 +39,7 @@ jobs: - name: 'Comment on PR' if: steps.download-artifact.outputs.artifact-id != '' - uses: actions/github-script@v3 + uses: actions/github-script@ffc2c79a5b2490bd33e0a41c1de74b877714d736 # v3.2.0 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | diff --git a/.github/workflows/libc-fullbuild-tests.yml b/.github/workflows/libc-fullbuild-tests.yml index d93ac84116240..00d2696e7e586 100644 --- a/.github/workflows/libc-fullbuild-tests.yml +++ b/.github/workflows/libc-fullbuild-tests.yml @@ -30,7 +30,7 @@ jobs: # - c_compiler: gcc # cpp_compiler: g++ steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 # Libc's build is relatively small comparing with other components of LLVM. # A fresh fullbuild takes about 190MiB of uncompressed disk space, which can @@ -39,7 +39,7 @@ jobs: # Do not use direct GHAC access even though it is supported by sccache. GHAC rejects # frequent small object writes. - name: Setup ccache - uses: hendrikmuhs/ccache-action@v1.2 + uses: hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17 with: max-size: 1G key: libc_fullbuild_${{ matrix.c_compiler }} diff --git a/.github/workflows/libc-overlay-tests.yml b/.github/workflows/libc-overlay-tests.yml index de4b58c008ee4..da82d8d9fe8ab 100644 --- a/.github/workflows/libc-overlay-tests.yml +++ b/.github/workflows/libc-overlay-tests.yml @@ -47,7 +47,7 @@ jobs: cpp_compiler: clang++ steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 # Libc's build is relatively small comparing with other components of LLVM. # A fresh linux overlay takes about 180MiB of uncompressed disk space, which can @@ -57,7 +57,7 @@ jobs: # Do not use direct GHAC access even though it is supported by sccache. GHAC rejects # frequent small object writes. - name: Setup ccache - uses: hendrikmuhs/ccache-action@v1 + uses: hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17 with: max-size: 1G key: libc_overlay_build_${{ matrix.os }}_${{ matrix.compiler.c_compiler }} diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml index 41b3075288d2d..440af1691deed 100644 --- a/.github/workflows/libclang-abi-tests.yml +++ b/.github/workflows/libclang-abi-tests.yml @@ -27,7 +27,7 @@ concurrency: jobs: abi-dump-setup: if: github.repository_owner == 'llvm' - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 outputs: BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }} ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} @@ -38,7 +38,7 @@ jobs: LLVM_VERSION_PATCH: ${{ steps.version.outputs.patch }} steps: - name: Checkout source - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 250 @@ -83,7 +83,7 @@ jobs: abi-dump: if: github.repository_owner == 'llvm' needs: abi-dump-setup - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: matrix: name: @@ -137,7 +137,7 @@ jobs: abi-compare: if: github.repository_owner == 'llvm' - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 needs: - abi-dump-setup - abi-dump diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index 93e673ca513a4..5d4394435890a 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -255,11 +255,11 @@ jobs: - name: Install a current LLVM if: ${{ matrix.mingw != true }} run: | - choco install -y llvm --version=18.1.6 --allow-downgrade + choco install -y llvm --version=19.1.7 --allow-downgrade - name: Install llvm-mingw if: ${{ matrix.mingw == true }} run: | - curl -LO https://github.com/mstorsjo/llvm-mingw/releases/download/20240606/llvm-mingw-20240606-ucrt-x86_64.zip + curl -LO https://github.com/mstorsjo/llvm-mingw/releases/download/20250114/llvm-mingw-20250114-ucrt-x86_64.zip powershell Expand-Archive llvm-mingw*.zip -DestinationPath . del llvm-mingw*.zip mv llvm-mingw* c:\llvm-mingw diff --git a/.github/workflows/libcxx-build-containers.yml b/.github/workflows/libcxx-build-containers.yml index bb4bd8843772f..b499af943c8e7 100644 --- a/.github/workflows/libcxx-build-containers.yml +++ b/.github/workflows/libcxx-build-containers.yml @@ -26,7 +26,7 @@ on: jobs: build-and-push: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 if: github.repository_owner == 'llvm' permissions: packages: write diff --git a/.github/workflows/libcxx-check-generated-files.yml b/.github/workflows/libcxx-check-generated-files.yml index 570055624b2a8..1f8103a114b9f 100644 --- a/.github/workflows/libcxx-check-generated-files.yml +++ b/.github/workflows/libcxx-check-generated-files.yml @@ -9,7 +9,7 @@ permissions: jobs: check_generated_files: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: Fetch LLVM sources uses: actions/checkout@v4 diff --git a/.github/workflows/libcxx-restart-preempted-jobs.yaml b/.github/workflows/libcxx-restart-preempted-jobs.yaml index e7e3772d4de22..7b341d7f22e41 100644 --- a/.github/workflows/libcxx-restart-preempted-jobs.yaml +++ b/.github/workflows/libcxx-restart-preempted-jobs.yaml @@ -26,7 +26,7 @@ jobs: statuses: read checks: write actions: write - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: "Restart Job" uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1 @@ -161,7 +161,7 @@ jobs: statuses: read checks: write actions: write - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: "Restart Job (test)" uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1 diff --git a/.github/workflows/llvm-bugs.yml b/.github/workflows/llvm-bugs.yml index c392078fa4525..5470662c97628 100644 --- a/.github/workflows/llvm-bugs.yml +++ b/.github/workflows/llvm-bugs.yml @@ -11,16 +11,16 @@ on: jobs: auto-subscribe: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 if: github.repository == 'llvm/llvm-project' steps: - - uses: actions/setup-node@v4 + - uses: actions/setup-node@1d0ff469b7ec7b3cb9d8673fde0c81c44821de2a # v4.2.0 with: node-version: 18 check-latest: true - run: npm install mailgun.js form-data - name: Send notification - uses: actions/github-script@v6 + uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6.4.1 env: MAILGUN_API_KEY: ${{ secrets.LLVM_BUGS_KEY }} with: diff --git a/.github/workflows/llvm-project-tests.yml b/.github/workflows/llvm-project-tests.yml index 4ff84c511250f..3da6743c49e25 100644 --- a/.github/workflows/llvm-project-tests.yml +++ b/.github/workflows/llvm-project-tests.yml @@ -14,7 +14,7 @@ on: required: false os_list: required: false - default: '["ubuntu-latest", "windows-2019", "macOS-13"]' + default: '["ubuntu-24.04", "windows-2019", "macOS-13"]' python_version: required: false type: string @@ -39,7 +39,7 @@ on: type: string # Use windows-2019 due to: # https://developercommunity.visualstudio.com/t/Prev-Issue---with-__assume-isnan-/1597317 - # Use ubuntu-22.04 rather than ubuntu-latest to match the ubuntu + # Use ubuntu-22.04 rather than ubuntu-24.04 to match the ubuntu # version in the CI container. Without this, setup-python tries # to install a python version linked against a newer version of glibc. # TODO(boomanaiden154): Bump the Ubuntu version once the version in the @@ -82,7 +82,7 @@ jobs: # lldb. Using this setup-python action to make 3.10 the default # python fixes this. - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: ${{ inputs.python_version }} - name: Install Ninja @@ -91,11 +91,11 @@ jobs: # actions/checkout deletes any existing files in the new git directory, # so this needs to either run before ccache-action or it has to use # clean: false. - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 250 - name: Setup ccache - uses: hendrikmuhs/ccache-action@v1 + uses: hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17 with: # A full build of llvm, clang, lld, and lldb takes about 250MB # of ccache space. There's not much reason to have more than this, diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 9b3d49d4e99b9..464b4c726b966 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -27,7 +27,7 @@ concurrency: jobs: abi-dump-setup: if: github.repository_owner == 'llvm' - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 outputs: BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }} ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} @@ -38,7 +38,7 @@ jobs: LLVM_VERSION_PATCH: ${{ steps.version.outputs.patch }} steps: - name: Checkout source - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 250 @@ -71,7 +71,7 @@ jobs: abi-dump: if: github.repository_owner == 'llvm' needs: abi-dump-setup - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: matrix: name: @@ -141,7 +141,7 @@ jobs: abi-compare: if: github.repository_owner == 'llvm' - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 needs: - abi-dump-setup - abi-dump diff --git a/.github/workflows/merged-prs.yml b/.github/workflows/merged-prs.yml index e29afd4097f9f..c771736389802 100644 --- a/.github/workflows/merged-prs.yml +++ b/.github/workflows/merged-prs.yml @@ -13,7 +13,7 @@ on: jobs: buildbot_comment: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 permissions: pull-requests: write if: >- @@ -21,7 +21,7 @@ jobs: (github.event.pull_request.merged == true) steps: - name: Checkout Automation Script - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: llvm/utils/git/ ref: main diff --git a/.github/workflows/new-issues.yml b/.github/workflows/new-issues.yml index 3cac57e268513..8480a657cc717 100644 --- a/.github/workflows/new-issues.yml +++ b/.github/workflows/new-issues.yml @@ -10,7 +10,7 @@ jobs: automate-issues-labels: permissions: issues: write - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 if: github.repository == 'llvm/llvm-project' steps: - uses: llvm/actions/issue-labeler@main diff --git a/.github/workflows/new-prs.yml b/.github/workflows/new-prs.yml index 88175d6f8d64d..935598e410dbb 100644 --- a/.github/workflows/new-prs.yml +++ b/.github/workflows/new-prs.yml @@ -16,7 +16,7 @@ on: jobs: greeter: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 permissions: pull-requests: write # Only comment on PRs that have been opened for the first time, by someone @@ -35,7 +35,7 @@ jobs: (github.event.pull_request.author_association != 'OWNER') steps: - name: Checkout Automation Script - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: llvm/utils/git/ ref: main @@ -56,7 +56,7 @@ jobs: automate-prs-labels: # Greet first so that only the author gets that notification. needs: greeter - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 # Ignore PRs with more than 10 commits. Pull requests with a lot of # commits tend to be accidents usually when someone made a mistake while trying # to rebase. We want to ignore these pull requests to avoid excessive @@ -67,7 +67,7 @@ jobs: github.event.pull_request.draft == false && github.event.pull_request.commits < 10 steps: - - uses: actions/labeler@v4 + - uses: actions/labeler@ac9175f8a1f3625fd0d4fb234536d26811351594 # v4.3.0 with: configuration-path: .github/new-prs-labeler.yml # workaround for https://github.com/actions/labeler/issues/112 diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml index 0e6180acf4a46..0692455ae0345 100644 --- a/.github/workflows/pr-code-format.yml +++ b/.github/workflows/pr-code-format.yml @@ -11,7 +11,7 @@ on: jobs: code_formatter: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 30 concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number }} @@ -19,12 +19,12 @@ jobs: if: github.repository == 'llvm/llvm-project' steps: - name: Fetch LLVM sources - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Checkout through merge base - uses: rmacklin/fetch-through-merge-base@v0 + uses: rmacklin/fetch-through-merge-base@bfe4d03a86f9afa52bc1a70e9814fc92a07f7b75 # v0.3.0 with: base_ref: ${{ github.event.pull_request.base.ref }} head_ref: ${{ github.event.pull_request.head.sha }} @@ -32,7 +32,7 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v39 + uses: tj-actions/changed-files@fea790cb660e33aef4bdf07304e28fedd77dfa13 # v39.2.4 with: separator: "," skip_initial_fetch: true @@ -40,7 +40,7 @@ jobs: # We need to pull the script from the main branch, so that we ensure # we get the latest version of this script. - name: Fetch code formatting utils - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: ${{ github.repository }} ref: ${{ github.base_ref }} @@ -58,12 +58,12 @@ jobs: echo "$CHANGED_FILES" - name: Install clang-format - uses: aminya/setup-cpp@v1 + uses: aminya/setup-cpp@17c11551771948abc5752bbf3183482567c7caf0 # v1.1.1 with: clangformat: 19.1.6 - name: Setup Python env - uses: actions/setup-python@v5 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: '3.11' cache: 'pip' diff --git a/.github/workflows/pr-request-release-note.yml b/.github/workflows/pr-request-release-note.yml index 2fa501dda16bb..57425e04ec2f4 100644 --- a/.github/workflows/pr-request-release-note.yml +++ b/.github/workflows/pr-request-release-note.yml @@ -14,7 +14,7 @@ jobs: github.repository_owner == 'llvm' && startsWith(github.ref, 'refs/heads/release') - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: # We need to pull the script from the main branch, so that we ensure # we get the latest version of this script. diff --git a/.github/workflows/pr-subscriber.yml b/.github/workflows/pr-subscriber.yml index 272d3e2f9ef8a..f558da8a8fe0e 100644 --- a/.github/workflows/pr-subscriber.yml +++ b/.github/workflows/pr-subscriber.yml @@ -10,11 +10,11 @@ permissions: jobs: auto-subscribe: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 if: github.repository == 'llvm/llvm-project' steps: - name: Checkout Automation Script - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: llvm/utils/git/ ref: main diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml index 231dd26e54ae0..fd6a6cc931ec4 100644 --- a/.github/workflows/release-binaries.yml +++ b/.github/workflows/release-binaries.yml @@ -336,7 +336,7 @@ jobs: runs-on: ${{ needs.prepare.outputs.test-runs-on }} steps: - name: Checkout Actions - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }} sparse-checkout: | diff --git a/.github/workflows/release-documentation.yml b/.github/workflows/release-documentation.yml index 09e21585bfc56..5a0aa063d32ac 100644 --- a/.github/workflows/release-documentation.yml +++ b/.github/workflows/release-documentation.yml @@ -29,7 +29,7 @@ on: jobs: release-documentation: name: Build and Upload Release Documentation - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 env: upload: ${{ inputs.upload && !contains(inputs.release-version, 'rc') }} steps: @@ -37,7 +37,7 @@ jobs: uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: Setup Python env - uses: actions/setup-python@v5 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: cache: 'pip' cache-dependency-path: './llvm/docs/requirements.txt' diff --git a/.github/workflows/release-doxygen.yml b/.github/workflows/release-doxygen.yml index ea95e5bb12b2b..d47c4337c07b2 100644 --- a/.github/workflows/release-doxygen.yml +++ b/.github/workflows/release-doxygen.yml @@ -33,7 +33,7 @@ on: jobs: release-doxygen: name: Build and Upload Release Doxygen - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 permissions: contents: write env: @@ -43,7 +43,7 @@ jobs: uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: Setup Python env - uses: actions/setup-python@v5 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: cache: 'pip' cache-dependency-path: './llvm/docs/requirements.txt' diff --git a/.github/workflows/release-lit.yml b/.github/workflows/release-lit.yml index 9d6f3140e6883..9adeffb74d52a 100644 --- a/.github/workflows/release-lit.yml +++ b/.github/workflows/release-lit.yml @@ -25,7 +25,7 @@ on: jobs: release-lit: name: Release Lit - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: Checkout LLVM uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 @@ -45,7 +45,7 @@ jobs: ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --user ${{ github.actor }} --user-token "$USER_TOKEN" check-permissions - name: Setup Cpp - uses: aminya/setup-cpp@v1 + uses: aminya/setup-cpp@17c11551771948abc5752bbf3183482567c7caf0 # v1.1.1 with: compiler: llvm-16.0.6 cmake: true @@ -66,14 +66,14 @@ jobs: python3 setup.py sdist bdist_wheel - name: Upload lit to test.pypi.org - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 with: password: ${{ secrets.LLVM_LIT_TEST_PYPI_API_TOKEN }} repository-url: https://test.pypi.org/legacy/ packages-dir: llvm/utils/lit/dist/ - name: Upload lit to pypi.org - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 with: password: ${{ secrets.LLVM_LIT_PYPI_API_TOKEN }} packages-dir: llvm/utils/lit/dist/ diff --git a/.github/workflows/release-sources.yml b/.github/workflows/release-sources.yml index a6c86823f99df..99438918b56f0 100644 --- a/.github/workflows/release-sources.yml +++ b/.github/workflows/release-sources.yml @@ -47,7 +47,7 @@ jobs: outputs: ref: ${{ steps.inputs.outputs.ref }} export-args: ${{ steps.inputs.outputs.export-args }} - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - id: inputs run: | @@ -63,7 +63,7 @@ jobs: release-sources: name: Package Release Sources if: github.repository_owner == 'llvm' - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 needs: - inputs permissions: diff --git a/.github/workflows/release-tasks.yml b/.github/workflows/release-tasks.yml index 52076ea1821b0..d55098345d89e 100644 --- a/.github/workflows/release-tasks.yml +++ b/.github/workflows/release-tasks.yml @@ -12,7 +12,7 @@ on: jobs: validate-tag: name: Validate Tag - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 if: github.repository == 'llvm/llvm-project' outputs: release-version: ${{ steps.validate-tag.outputs.release-version }} @@ -26,7 +26,7 @@ jobs: release-create: name: Create a New Release - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 permissions: contents: write # For creating the release. needs: validate-tag diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index ff61cf83a6af3..db741b6405962 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -21,7 +21,7 @@ permissions: jobs: analysis: name: Scorecard analysis - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 if: github.repository == 'llvm/llvm-project' permissions: # Needed to upload the results to code-scanning dashboard. diff --git a/.github/workflows/version-check.yml b/.github/workflows/version-check.yml index 894e07d323ca9..a0a598094376f 100644 --- a/.github/workflows/version-check.yml +++ b/.github/workflows/version-check.yml @@ -14,10 +14,10 @@ permissions: jobs: version_check: if: github.repository_owner == 'llvm' - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: Fetch LLVM sources - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 8bec1db70e25a..485979f1a55a1 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -537,6 +537,17 @@ class BinaryContext { BinaryFunction *createInjectedBinaryFunction(const std::string &Name, bool IsSimple = true); + /// Patch the original binary contents at address \p Address with a sequence + /// of instructions from the \p Instructions list. The callee is responsible + /// for checking that the sequence doesn't cross any function or section + /// boundaries. + /// + /// Optional \p Name can be assigned to the patch. The name will be emitted to + /// the symbol table at \p Address. + BinaryFunction *createInstructionPatch(uint64_t Address, + InstructionListType &Instructions, + const Twine &Name = ""); + std::vector &getInjectedBinaryFunctions() { return InjectedBinaryFunctions; } diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 942840a7621fd..c9ccb69ab52c1 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -343,9 +343,6 @@ class BinaryFunction { /// True if the function uses ORC format for stack unwinding. bool HasORC{false}; - /// True if the original entry point was patched. - bool IsPatched{false}; - /// True if the function contains explicit or implicit indirect branch to its /// split fragments, e.g., split jump table, landing pad in split fragment bool HasIndirectTargetToSplitFragment{false}; @@ -1376,9 +1373,6 @@ class BinaryFunction { /// Return true if the function uses ORC format for stack unwinding. bool hasORC() const { return HasORC; } - /// Return true if the original entry point was patched. - bool isPatched() const { return IsPatched; } - const JumpTable *getJumpTable(const MCInst &Inst) const { const uint64_t Address = BC.MIB->getJumpTable(Inst); return getJumpTableContainingAddress(Address); @@ -1729,8 +1723,6 @@ class BinaryFunction { /// Mark function that should not be emitted. void setIgnored(); - void setIsPatched(bool V) { IsPatched = V; } - void setHasIndirectTargetToSplitFragment(bool V) { HasIndirectTargetToSplitFragment = V; } diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index fbb853656fb91..1d45a314a17b6 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -637,10 +637,6 @@ class MCPlusBuilder { return false; } - virtual void getADRReg(const MCInst &Inst, MCPhysReg &RegName) const { - llvm_unreachable("not implemented"); - } - virtual bool isMoveMem2Reg(const MCInst &Inst) const { return false; } virtual bool mayLoad(const MCInst &Inst) const { @@ -1538,6 +1534,13 @@ class MCPlusBuilder { llvm_unreachable("not implemented"); } + /// Undo the linker's ADRP+ADD to ADR relaxation. Take \p ADRInst and return + /// ADRP+ADD instruction sequence. + virtual InstructionListType undoAdrpAddRelaxation(const MCInst &ADRInst, + MCContext *Ctx) const { + llvm_unreachable("not implemented"); + } + /// Return not 0 if the instruction CurInst, in combination with the recent /// history of disassembled instructions supplied by [Begin, End), is a linker /// generated veneer/stub that needs patching. This happens in AArch64 when diff --git a/bolt/include/bolt/Passes/ContinuityStats.h b/bolt/include/bolt/Passes/ContinuityStats.h deleted file mode 100644 index bd4d491ad4a55..0000000000000 --- a/bolt/include/bolt/Passes/ContinuityStats.h +++ /dev/null @@ -1,61 +0,0 @@ -//===- bolt/Passes/ContinuityStats.h ----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass checks how well the BOLT input profile satisfies the following -// "CFG continuity" property of a perfect profile: -// -// Each positive-execution-count block in the function’s CFG -// should be *reachable* from a positive-execution-count function -// entry block through a positive-execution-count path. -// -// More specifically, for each of the hottest 1000 functions, the pass -// calculates the function’s fraction of basic block execution counts -// that is *unreachable*. It then reports the 95th percentile of the -// distribution of the 1000 unreachable fractions in a single BOLT-INFO line. -// The smaller the reported value is, the better the BOLT profile -// satisfies the CFG continuity property. - -// The default value of 1000 above can be changed via the hidden BOLT option -// `-num-functions-for-continuity-check=[N]`. -// If more detailed stats are needed, `-v=1` can be used: the hottest N -// functions will be grouped into 5 equally-sized buckets, from the hottest -// to the coldest; for each bucket, various summary statistics of the -// distribution of the unreachable fractions and the raw unreachable execution -// counts will be reported. -// -//===----------------------------------------------------------------------===// - -#ifndef BOLT_PASSES_CONTINUITYSTATS_H -#define BOLT_PASSES_CONTINUITYSTATS_H - -#include "bolt/Passes/BinaryPasses.h" -#include - -namespace llvm { - -class raw_ostream; - -namespace bolt { -class BinaryContext; - -/// Compute and report to the user the function CFG continuity quality -class PrintContinuityStats : public BinaryFunctionPass { -public: - explicit PrintContinuityStats(const cl::opt &PrintPass) - : BinaryFunctionPass(PrintPass) {} - - bool shouldOptimize(const BinaryFunction &BF) const override; - const char *getName() const override { return "continuity-stats"; } - bool shouldPrint(const BinaryFunction &) const override { return false; } - Error runOnFunctions(BinaryContext &BC) override; -}; - -} // namespace bolt -} // namespace llvm - -#endif // BOLT_PASSES_CONTINUITYSTATS_H diff --git a/bolt/include/bolt/Passes/PatchEntries.h b/bolt/include/bolt/Passes/PatchEntries.h index fa6b5811a4c3b..04ec9165c2ff2 100644 --- a/bolt/include/bolt/Passes/PatchEntries.h +++ b/bolt/include/bolt/Passes/PatchEntries.h @@ -26,8 +26,6 @@ class PatchEntries : public BinaryFunctionPass { struct Patch { const MCSymbol *Symbol; uint64_t Address; - uint64_t FileOffset; - BinarySection *Section; }; public: diff --git a/bolt/include/bolt/Passes/ProfileQualityStats.h b/bolt/include/bolt/Passes/ProfileQualityStats.h new file mode 100644 index 0000000000000..86fc88cefc10e --- /dev/null +++ b/bolt/include/bolt/Passes/ProfileQualityStats.h @@ -0,0 +1,98 @@ +//===- bolt/Passes/ProfileQualityStats.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass checks the BOLT input profile quality. +// +// Check 1: how well the input profile satisfies the following +// "CFG continuity" property of a perfect profile: +// +// Each positive-execution-count block in the function’s CFG +// is *reachable* from a positive-execution-count function +// entry block through a positive-execution-count path. +// +// More specifically, for each of the hottest 1000 functions, the pass +// calculates the function’s fraction of basic block execution counts +// that is *unreachable*. It then reports the 95th percentile of the +// distribution of the 1000 unreachable fractions in a single BOLT-INFO line. +// The smaller the reported value is, the better the BOLT profile +// satisfies the CFG continuity property. +// +// Check 2: how well the input profile satisfies the "call graph flow +// conservation" property of a perfect profile: +// +// For each function that is not a program entry, the number of times the +// function is called is equal to the net CFG outflow of the +// function's entry block(s). +// +// More specifically, for each of the hottest 1000 functions, the pass obtains +// A = number of times the function is called, B = the function's entry blocks' +// inflow, C = the function's entry blocks' outflow, where B and C are computed +// using the function's weighted CFG. It then computes gap = 1 - MIN(A,C-B) / +// MAX(A, C-B). The pass reports the 95th percentile of the distribution of the +// 1000 gaps in a single BOLT-INFO line. The smaller the reported value is, the +// better the BOLT profile satisfies the call graph flow conservation property. +// +// Check 3: how well the input profile satisfies the "function CFG flow +// conservation property" of a perfect profile: +// +// A non-entry non-exit basic block's inflow is equal to its outflow. +// +// More specifically, for each of the hottest 1000 functions, the pass loops +// over its basic blocks that are non-entry and non-exit, and for each block +// obtains a block gap = 1 - MIN(block inflow, block outflow, block call count +// if any) / MAX(block inflow, block outflow, block call count if any). It then +// aggregates the block gaps into 2 values for the function: "weighted" is the +// weighted average of the block conservation gaps, where the weights depend on +// each block's execution count and instruction count; "worst" is the worst +// (biggest) block gap acorss all basic blocks in the function with an execution +// count of > 500. The pass then reports the 95th percentile of the weighted and +// worst values of the 1000 functions in a single BOLT-INFO line. The smaller +// the reported values are, the better the BOLT profile satisfies the function +// CFG flow conservation property. +// +// The default value of 1000 above can be changed via the hidden BOLT option +// `-top-functions-for-profile-quality-check=[N]`. +// The default reporting of the 95th percentile can be changed via the hidden +// BOLT option `-percentile-for-profile-quality-check=[M]`. +// +// If more detailed stats are needed, `-v=1` can be used: the hottest N +// functions will be grouped into 5 equally-sized buckets, from the hottest +// to the coldest; for each bucket, various summary statistics of the +// profile quality will be reported. +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_PROFILEQUALITYSTATS_H +#define BOLT_PASSES_PROFILEQUALITYSTATS_H + +#include "bolt/Passes/BinaryPasses.h" +#include + +namespace llvm { + +class raw_ostream; + +namespace bolt { +class BinaryContext; + +/// Compute and report to the user the profile quality +class PrintProfileQualityStats : public BinaryFunctionPass { +public: + explicit PrintProfileQualityStats(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + + bool shouldOptimize(const BinaryFunction &BF) const override; + const char *getName() const override { return "profile-quality-stats"; } + bool shouldPrint(const BinaryFunction &) const override { return false; } + Error runOnFunctions(BinaryContext &BC) override; +}; + +} // namespace bolt +} // namespace llvm + +#endif // BOLT_PASSES_PROFILEQUALITYSTATS_H diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index f9fc536f3569a..2045b9bd5d8a5 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -46,9 +46,10 @@ using namespace llvm; namespace opts { -cl::opt NoHugePages("no-huge-pages", - cl::desc("use regular size pages for code alignment"), - cl::Hidden, cl::cat(BoltCategory)); +static cl::opt + NoHugePages("no-huge-pages", + cl::desc("use regular size pages for code alignment"), + cl::Hidden, cl::cat(BoltCategory)); static cl::opt PrintDebugInfo("print-debug-info", @@ -2400,6 +2401,32 @@ BinaryContext::createInjectedBinaryFunction(const std::string &Name, return BF; } +BinaryFunction *BinaryContext::createInstructionPatch( + uint64_t Address, InstructionListType &Instructions, const Twine &Name) { + ErrorOr Section = getSectionForAddress(Address); + assert(Section && "cannot get section for patching"); + assert(Section->hasSectionRef() && Section->isText() && + "can only patch input file code sections"); + + const uint64_t FileOffset = + Section->getInputFileOffset() + Address - Section->getAddress(); + + std::string PatchName = Name.str(); + if (PatchName.empty()) { + // Assign unique name to the patch. + static uint64_t N = 0; + PatchName = "__BP_" + std::to_string(N++); + } + + BinaryFunction *PBF = createInjectedBinaryFunction(PatchName); + PBF->setOutputAddress(Address); + PBF->setFileOffset(FileOffset); + PBF->setOriginSection(&Section.get()); + PBF->addBasicBlock()->addInstructions(Instructions); + + return PBF; +} + std::pair BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { // Adjust branch instruction to match the current layout. diff --git a/bolt/lib/Core/BinaryData.cpp b/bolt/lib/Core/BinaryData.cpp index e9ddf08d8695f..88f13eafd614d 100644 --- a/bolt/lib/Core/BinaryData.cpp +++ b/bolt/lib/Core/BinaryData.cpp @@ -24,7 +24,7 @@ namespace opts { extern cl::OptionCategory BoltCategory; extern cl::opt Verbosity; -cl::opt +static cl::opt PrintSymbolAliases("print-aliases", cl::desc("print aliases when printing objects"), cl::Hidden, cl::cat(BoltCategory)); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index ff5eb5cf6e1eb..35617a92c1b2a 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -67,7 +67,7 @@ extern cl::opt Verbosity; extern bool processAllFunctions(); -cl::opt CheckEncoding( +static cl::opt CheckEncoding( "check-encoding", cl::desc("perform verification of LLVM instruction encoding/decoding. " "Every instruction in the input is decoded and re-encoded. " @@ -144,14 +144,11 @@ cl::opt cl::desc("print time spent constructing binary functions"), cl::Hidden, cl::cat(BoltCategory)); -cl::opt -TrapOnAVX512("trap-avx512", - cl::desc("in relocation mode trap upon entry to any function that uses " - "AVX-512 instructions"), - cl::init(false), - cl::ZeroOrMore, - cl::Hidden, - cl::cat(BoltCategory)); +static cl::opt TrapOnAVX512( + "trap-avx512", + cl::desc("in relocation mode trap upon entry to any function that uses " + "AVX-512 instructions"), + cl::init(false), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); bool shouldPrint(const BinaryFunction &Function) { if (Function.isIgnored()) @@ -1333,6 +1330,22 @@ Error BinaryFunction::disassemble() { BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr); BC.errs() << '\n'; } + + // Verify that we've symbolized an operand if the instruction has a + // relocation against it. + if (getRelocationInRange(Offset, Offset + Size)) { + bool HasSymbolicOp = false; + for (MCOperand &Op : Instruction) { + if (Op.isExpr()) { + HasSymbolicOp = true; + break; + } + } + if (!HasSymbolicOp) + return createFatalBOLTError( + "expected symbolized operand for instruction at 0x" + + Twine::utohexstr(AbsoluteInstrAddr)); + } } // Special handling for AVX-512 instructions. @@ -1440,9 +1453,8 @@ Error BinaryFunction::disassemble() { if (BC.isAArch64()) handleAArch64IndirectCall(Instruction, Offset); } - } else if (BC.isAArch64() || BC.isRISCV()) { + } else if (BC.isRISCV()) { // Check if there's a relocation associated with this instruction. - bool UsedReloc = false; for (auto Itr = Relocations.lower_bound(Offset), ItrE = Relocations.lower_bound(Offset + Size); Itr != ItrE; ++Itr) { @@ -1467,24 +1479,6 @@ Error BinaryFunction::disassemble() { Relocation.Type); (void)Result; assert(Result && "cannot replace immediate with relocation"); - - // For aarch64, if we replaced an immediate with a symbol from a - // relocation, we mark it so we do not try to further process a - // pc-relative operand. All we need is the symbol. - UsedReloc = true; - } - - if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) { - if (auto NewE = handleErrors( - handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size), - [&](const BOLTError &E) -> Error { - if (E.isFatal()) - return Error(std::make_unique(std::move(E))); - if (!E.getMessage().empty()) - E.log(BC.errs()); - return Error::success(); - })) - return Error(std::move(NewE)); } } @@ -1544,15 +1538,11 @@ MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) { } void BinaryFunction::analyzeInstructionForFuncReference(const MCInst &Inst) { - for (const MCOperand &Op : MCPlus::primeOperands(Inst)) { - if (!Op.isExpr()) - continue; - const MCExpr &Expr = *Op.getExpr(); - if (Expr.getKind() != MCExpr::SymbolRef) + for (unsigned OpNum = 0; OpNum < MCPlus::getNumPrimeOperands(Inst); ++OpNum) { + const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum); + if (!Symbol) continue; - const MCSymbol &Symbol = cast(Expr).getSymbol(); - // Set HasAddressTaken for a function regardless of the ICF level. - if (BinaryFunction *BF = BC.getFunctionForSymbol(&Symbol)) + if (BinaryFunction *BF = BC.getFunctionForSymbol(Symbol)) BF->setHasAddressTaken(true); } } @@ -1584,8 +1574,9 @@ bool BinaryFunction::scanExternalRefs() { assert(FunctionData.size() == getMaxSize() && "function size does not match raw data size"); - BC.SymbolicDisAsm->setSymbolizer( - BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false)); + if (BC.isX86()) + BC.SymbolicDisAsm->setSymbolizer( + BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false)); // Disassemble contents of the function. Detect code entry points and create // relocations for references to code that will be moved. diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp index e9a9741bc3716..523ab8480cc90 100644 --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -271,22 +271,11 @@ static bool skipRelocationProcessAArch64(uint64_t &Type, uint64_t Contents) { return (Contents & 0xfc000000) == 0x14000000; }; - auto IsAdr = [](uint64_t Contents) -> bool { - // The bits 31-24 are 0b0xx10000 - return (Contents & 0x9f000000) == 0x10000000; - }; - auto IsAddImm = [](uint64_t Contents) -> bool { // The bits 30-23 are 0b00100010 return (Contents & 0x7F800000) == 0x11000000; }; - auto IsNop = [](uint64_t Contents) -> bool { return Contents == 0xd503201f; }; - - // The linker might eliminate the instruction and replace it with NOP, ignore - if (IsNop(Contents)) - return true; - // The linker might relax ADRP+LDR instruction sequence for loading symbol // address from GOT table to ADRP+ADD sequence that would point to the // binary-local symbol. Change relocation type in order to process it right. @@ -332,18 +321,6 @@ static bool skipRelocationProcessAArch64(uint64_t &Type, uint64_t Contents) { } } - // The linker might relax ADRP+ADD or ADRP+LDR sequences to the ADR+NOP - switch (Type) { - default: - break; - case ELF::R_AARCH64_ADR_PREL_PG_HI21: - case ELF::R_AARCH64_ADD_ABS_LO12_NC: - case ELF::R_AARCH64_ADR_GOT_PAGE: - case ELF::R_AARCH64_LD64_GOT_LO12_NC: - if (IsAdr(Contents)) - return true; - } - return false; } diff --git a/bolt/lib/Passes/ADRRelaxationPass.cpp b/bolt/lib/Passes/ADRRelaxationPass.cpp index 52811edcb8273..4b37a061ac12d 100644 --- a/bolt/lib/Passes/ADRRelaxationPass.cpp +++ b/bolt/lib/Passes/ADRRelaxationPass.cpp @@ -71,14 +71,10 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) { continue; } - MCPhysReg Reg; - BC.MIB->getADRReg(Inst, Reg); - int64_t Addend = BC.MIB->getTargetAddend(Inst); - InstructionListType Addr; - + InstructionListType AdrpAdd; { auto L = BC.scopeLock(); - Addr = BC.MIB->materializeAddress(Symbol, BC.Ctx.get(), Reg, Addend); + AdrpAdd = BC.MIB->undoAdrpAddRelaxation(Inst, BC.Ctx.get()); } if (It != BB.begin() && BC.MIB->isNoop(*std::prev(It))) { @@ -99,7 +95,7 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) { PassFailed = true; return; } - It = BB.replaceInstruction(It, Addr); + It = BB.replaceInstruction(It, AdrpAdd); } } } diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp index 555f82a5a8178..c3ddedaaa1466 100644 --- a/bolt/lib/Passes/Aligner.cpp +++ b/bolt/lib/Passes/Aligner.cpp @@ -25,15 +25,12 @@ extern cl::opt AlignBlocks; extern cl::opt PreserveBlocksAlignment; extern cl::opt AlignFunctions; -cl::opt -AlignBlocksMinSize("align-blocks-min-size", - cl::desc("minimal size of the basic block that should be aligned"), - cl::init(0), - cl::ZeroOrMore, - cl::Hidden, - cl::cat(BoltOptCategory)); - -cl::opt AlignBlocksThreshold( +static cl::opt AlignBlocksMinSize( + "align-blocks-min-size", + cl::desc("minimal size of the basic block that should be aligned"), + cl::init(0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); + +static cl::opt AlignBlocksThreshold( "align-blocks-threshold", cl::desc( "align only blocks with frequency larger than containing function " @@ -42,19 +39,17 @@ cl::opt AlignBlocksThreshold( "containing function."), cl::init(800), cl::Hidden, cl::cat(BoltOptCategory)); -cl::opt AlignFunctionsMaxBytes( +static cl::opt AlignFunctionsMaxBytes( "align-functions-max-bytes", cl::desc("maximum number of bytes to use to align functions"), cl::init(32), cl::cat(BoltOptCategory)); -cl::opt -BlockAlignment("block-alignment", - cl::desc("boundary to use for alignment of basic blocks"), - cl::init(16), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); +static cl::opt + BlockAlignment("block-alignment", + cl::desc("boundary to use for alignment of basic blocks"), + cl::init(16), cl::ZeroOrMore, cl::cat(BoltOptCategory)); -cl::opt +static cl::opt UseCompactAligner("use-compact-aligner", cl::desc("Use compact approach for aligning functions"), cl::init(true), cl::cat(BoltOptCategory)); diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index adc91658050a6..3864255a09ebe 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -27,7 +27,7 @@ add_llvm_library(LLVMBOLTPasses PatchEntries.cpp PettisAndHansen.cpp PLTCall.cpp - ContinuityStats.cpp + ProfileQualityStats.cpp RegAnalysis.cpp RegReAssign.cpp ReorderAlgorithm.cpp diff --git a/bolt/lib/Passes/ContinuityStats.cpp b/bolt/lib/Passes/ContinuityStats.cpp deleted file mode 100644 index b32365b59065d..0000000000000 --- a/bolt/lib/Passes/ContinuityStats.cpp +++ /dev/null @@ -1,250 +0,0 @@ -//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the continuity stats calculation pass. -// -//===----------------------------------------------------------------------===// - -#include "bolt/Passes/ContinuityStats.h" -#include "bolt/Core/BinaryBasicBlock.h" -#include "bolt/Core/BinaryFunction.h" -#include "bolt/Utils/CommandLineOpts.h" -#include "llvm/Support/CommandLine.h" -#include -#include -#include - -#define DEBUG_TYPE "bolt-opts" - -using namespace llvm; -using namespace bolt; - -namespace opts { -extern cl::opt Verbosity; -cl::opt NumFunctionsForContinuityCheck( - "num-functions-for-continuity-check", - cl::desc("number of hottest functions to print aggregated " - "CFG discontinuity stats of."), - cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); -} // namespace opts - -namespace { -using FunctionListType = std::vector; -using function_iterator = FunctionListType::iterator; - -template -void printDistribution(raw_ostream &OS, std::vector &values, - bool Fraction = false) { - if (values.empty()) - return; - // Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%, - // 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as - // fractions instead of integers. - std::sort(values.begin(), values.end()); - - auto printLine = [&](std::string Text, double Percent) { - int Rank = int(values.size() * (1.0 - Percent / 100)); - if (Percent == 0) - Rank = values.size() - 1; - if (Fraction) - OS << " " << Text << std::string(9 - Text.length(), ' ') << ": " - << format("%.2lf%%", values[Rank] * 100) << "\n"; - else - OS << " " << Text << std::string(9 - Text.length(), ' ') << ": " - << values[Rank] << "\n"; - }; - - printLine("MAX", 0); - const int percentages[] = {1, 5, 10, 20, 50, 80}; - for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) { - printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]); - } - printLine("MIN", 100); -} - -void printCFGContinuityStats(raw_ostream &OS, - iterator_range &Functions) { - // Given a perfect profile, every positive-execution-count BB should be - // connected to an entry of the function through a positive-execution-count - // directed path in the control flow graph. - std::vector NumUnreachables; - std::vector SumECUnreachables; - std::vector FractionECUnreachables; - - for (auto it = Functions.begin(); it != Functions.end(); ++it) { - const BinaryFunction *Function = *it; - if (Function->size() <= 1) - continue; - - // Compute the sum of all BB execution counts (ECs). - size_t NumPosECBBs = 0; - size_t SumAllBBEC = 0; - for (const BinaryBasicBlock &BB : *Function) { - const size_t BBEC = BB.getKnownExecutionCount(); - NumPosECBBs += BBEC > 0 ? 1 : 0; - SumAllBBEC += BBEC; - } - - // Perform BFS on subgraph of CFG induced by positive weight edges. - // Compute the number of BBs reachable from the entry(s) of the function and - // the sum of their execution counts (ECs). - std::unordered_map IndexToBB; - std::unordered_set Visited; - std::queue Queue; - for (const BinaryBasicBlock &BB : *Function) { - // Make sure BB.getIndex() is not already in IndexToBB. - assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end()); - IndexToBB[BB.getIndex()] = &BB; - if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) { - Queue.push(BB.getIndex()); - Visited.insert(BB.getIndex()); - } - } - while (!Queue.empty()) { - const unsigned BBIndex = Queue.front(); - const BinaryBasicBlock *BB = IndexToBB[BBIndex]; - Queue.pop(); - auto SuccBIIter = BB->branch_info_begin(); - for (const BinaryBasicBlock *Succ : BB->successors()) { - const uint64_t Count = SuccBIIter->Count; - if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) { - ++SuccBIIter; - continue; - } - if (!Visited.insert(Succ->getIndex()).second) { - ++SuccBIIter; - continue; - } - Queue.push(Succ->getIndex()); - ++SuccBIIter; - } - } - - const size_t NumReachableBBs = Visited.size(); - - // Loop through Visited, and sum the corresponding BBs' execution counts - // (ECs). - size_t SumReachableBBEC = 0; - for (const unsigned BBIndex : Visited) { - const BinaryBasicBlock *BB = IndexToBB[BBIndex]; - SumReachableBBEC += BB->getKnownExecutionCount(); - } - - const size_t NumPosECBBsUnreachableFromEntry = - NumPosECBBs - NumReachableBBs; - const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC; - const double FractionECUnreachable = - (double)SumUnreachableBBEC / SumAllBBEC; - - if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) { - OS << "Non-trivial CFG discontinuity observed in function " - << Function->getPrintName() << "\n"; - LLVM_DEBUG(Function->dump()); - } - - NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry); - SumECUnreachables.push_back(SumUnreachableBBEC); - FractionECUnreachables.push_back(FractionECUnreachable); - } - - if (FractionECUnreachables.empty()) - return; - - std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end()); - const int Rank = int(FractionECUnreachables.size() * 0.95); - OS << format("top 5%% function CFG discontinuity is %.2lf%%\n", - FractionECUnreachables[Rank] * 100); - - if (opts::Verbosity >= 1) { - OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n" - << "distribution of NUM(unreachable POS BBs) among all focal " - "functions\n"; - printDistribution(OS, NumUnreachables); - - OS << "distribution of SUM_EC(unreachable POS BBs) among all focal " - "functions\n"; - printDistribution(OS, SumECUnreachables); - - OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all " - "POS BBs))] among all focal functions\n"; - printDistribution(OS, FractionECUnreachables, /*Fraction=*/true); - } -} - -void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, - size_t NumTopFunctions) { - // Sort the list of functions by execution counts (reverse). - llvm::sort(ValidFunctions, - [&](const BinaryFunction *A, const BinaryFunction *B) { - return A->getKnownExecutionCount() > B->getKnownExecutionCount(); - }); - - const size_t RealNumTopFunctions = - std::min(NumTopFunctions, ValidFunctions.size()); - - iterator_range Functions( - ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions); - - BC.outs() << format("BOLT-INFO: among the hottest %zu functions ", - RealNumTopFunctions); - printCFGContinuityStats(BC.outs(), Functions); - - // Print more detailed bucketed stats if requested. - if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) { - const size_t PerBucketSize = RealNumTopFunctions / 5; - BC.outs() << format( - "Detailed stats for 5 buckets, each with %zu functions:\n", - PerBucketSize); - - // For each bucket, print the CFG continuity stats of the functions in the - // bucket. - for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) { - const size_t StartIndex = BucketIndex * PerBucketSize; - const size_t EndIndex = StartIndex + PerBucketSize; - iterator_range Functions( - ValidFunctions.begin() + StartIndex, - ValidFunctions.begin() + EndIndex); - const size_t MaxFunctionExecutionCount = - ValidFunctions[StartIndex]->getKnownExecutionCount(); - const size_t MinFunctionExecutionCount = - ValidFunctions[EndIndex - 1]->getKnownExecutionCount(); - BC.outs() << format("----------------\n| Bucket %zu: " - "|\n----------------\n", - BucketIndex + 1) - << format( - "execution counts of the %zu functions in the bucket: " - "%zu-%zu\n", - EndIndex - StartIndex, MinFunctionExecutionCount, - MaxFunctionExecutionCount); - printCFGContinuityStats(BC.outs(), Functions); - } - } -} -} // namespace - -bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const { - if (BF.empty() || !BF.hasValidProfile()) - return false; - - return BinaryFunctionPass::shouldOptimize(BF); -} - -Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) { - // Create a list of functions with valid profiles. - FunctionListType ValidFunctions; - for (const auto &BFI : BC.getBinaryFunctions()) { - const BinaryFunction *Function = &BFI.second; - if (PrintContinuityStats::shouldOptimize(*Function)) - ValidFunctions.push_back(Function); - } - if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0) - return Error::success(); - - printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck); - return Error::success(); -} diff --git a/bolt/lib/Passes/FrameOptimizer.cpp b/bolt/lib/Passes/FrameOptimizer.cpp index 1c0f9555f9eb9..81d4d9367f58c 100644 --- a/bolt/lib/Passes/FrameOptimizer.cpp +++ b/bolt/lib/Passes/FrameOptimizer.cpp @@ -43,7 +43,7 @@ FrameOptimization("frame-opt", cl::ZeroOrMore, cl::cat(BoltOptCategory)); -cl::opt RemoveStores( +static cl::opt RemoveStores( "frame-opt-rm-stores", cl::init(FOP_NONE), cl::desc("apply additional analysis to remove stores (experimental)"), cl::cat(BoltOptCategory)); diff --git a/bolt/lib/Passes/PLTCall.cpp b/bolt/lib/Passes/PLTCall.cpp index 31c2d92ebc204..90b5f586a7bad 100644 --- a/bolt/lib/Passes/PLTCall.cpp +++ b/bolt/lib/Passes/PLTCall.cpp @@ -22,22 +22,16 @@ namespace opts { extern cl::OptionCategory BoltOptCategory; -cl::opt -PLT("plt", - cl::desc("optimize PLT calls (requires linking with -znow)"), - cl::init(bolt::PLTCall::OT_NONE), - cl::values(clEnumValN(bolt::PLTCall::OT_NONE, - "none", - "do not optimize PLT calls"), - clEnumValN(bolt::PLTCall::OT_HOT, - "hot", - "optimize executed (hot) PLT calls"), - clEnumValN(bolt::PLTCall::OT_ALL, - "all", - "optimize all PLT calls")), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); - +static cl::opt + PLT("plt", cl::desc("optimize PLT calls (requires linking with -znow)"), + cl::init(bolt::PLTCall::OT_NONE), + cl::values(clEnumValN(bolt::PLTCall::OT_NONE, "none", + "do not optimize PLT calls"), + clEnumValN(bolt::PLTCall::OT_HOT, "hot", + "optimize executed (hot) PLT calls"), + clEnumValN(bolt::PLTCall::OT_ALL, "all", + "optimize all PLT calls")), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); } namespace llvm { diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp index 981d1b70af907..4ce9c09b311df 100644 --- a/bolt/lib/Passes/PatchEntries.cpp +++ b/bolt/lib/Passes/PatchEntries.cpp @@ -83,9 +83,8 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { return false; } - PendingPatches.emplace_back(Patch{Symbol, Function.getAddress() + Offset, - Function.getFileOffset() + Offset, - Function.getOriginSection()}); + PendingPatches.emplace_back( + Patch{Symbol, Function.getAddress() + Offset}); NextValidByte = Offset + PatchSize; if (NextValidByte > Function.getMaxSize()) { if (opts::Verbosity >= 1) @@ -118,16 +117,12 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { } for (Patch &Patch : PendingPatches) { - BinaryFunction *PatchFunction = BC.createInjectedBinaryFunction( + // Add instruction patch to the binary. + InstructionListType Instructions; + BC.MIB->createLongTailCall(Instructions, Patch.Symbol, BC.Ctx.get()); + BinaryFunction *PatchFunction = BC.createInstructionPatch( + Patch.Address, Instructions, NameResolver::append(Patch.Symbol->getName(), ".org.0")); - // Force the function to be emitted at the given address. - PatchFunction->setOutputAddress(Patch.Address); - PatchFunction->setFileOffset(Patch.FileOffset); - PatchFunction->setOriginSection(Patch.Section); - - InstructionListType Seq; - BC.MIB->createLongTailCall(Seq, Patch.Symbol, BC.Ctx.get()); - PatchFunction->addBasicBlock()->addInstructions(Seq); // Verify the size requirements. uint64_t HotSize, ColdSize; @@ -135,8 +130,6 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { assert(!ColdSize && "unexpected cold code"); assert(HotSize <= PatchSize && "max patch size exceeded"); } - - Function.setIsPatched(true); } return Error::success(); } diff --git a/bolt/lib/Passes/ProfileQualityStats.cpp b/bolt/lib/Passes/ProfileQualityStats.cpp new file mode 100644 index 0000000000000..332c78da8a1e3 --- /dev/null +++ b/bolt/lib/Passes/ProfileQualityStats.cpp @@ -0,0 +1,579 @@ +//===- bolt/Passes/ProfileQualityStats.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the profile quality stats calculation pass. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/ProfileQualityStats.h" +#include "bolt/Core/BinaryBasicBlock.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Utils/CommandLineOpts.h" +#include "llvm/Support/CommandLine.h" +#include +#include +#include + +using namespace llvm; +using namespace bolt; + +namespace opts { +extern cl::opt Verbosity; +static cl::opt TopFunctionsForProfileQualityCheck( + "top-functions-for-profile-quality-check", + cl::desc("number of hottest functions to print aggregated " + "profile quality stats of."), + cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); +static cl::opt PercentileForProfileQualityCheck( + "percentile-for-profile-quality-check", + cl::desc("Percentile of profile quality distributions over hottest " + "functions to report."), + cl::init(95), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); +} // namespace opts + +namespace { +using FunctionListType = std::vector; +using function_iterator = FunctionListType::iterator; + +// Function number -> vector of flows for BBs in the function +using TotalFlowMapTy = std::unordered_map>; +// Function number -> flow count +using FunctionFlowMapTy = std::unordered_map; +struct FlowInfo { + TotalFlowMapTy TotalIncomingFlows; + TotalFlowMapTy TotalOutgoingFlows; + TotalFlowMapTy TotalMaxCountMaps; + TotalFlowMapTy TotalMinCountMaps; + FunctionFlowMapTy CallGraphIncomingFlows; +}; + +template +void printDistribution(raw_ostream &OS, std::vector &values, + bool Fraction = false) { + // Assume values are sorted. + if (values.empty()) + return; + + OS << " Length : " << values.size() << "\n"; + + auto printLine = [&](std::string Text, double Percent) { + int Rank = int(values.size() * (100 - Percent) / 100); + if (Percent == 0) + Rank = values.size() - 1; + if (Fraction) + OS << " " << Text << std::string(11 - Text.length(), ' ') << ": " + << formatv("{0:P}", values[Rank]) << "\n"; + else + OS << " " << Text << std::string(11 - Text.length(), ' ') << ": " + << values[Rank] << "\n"; + }; + + printLine("MAX", 0); + const int percentages[] = {1, 5, 10, 20, 50, 80}; + for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) { + printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]); + } + printLine("MIN", 100); +} + +void printCFGContinuityStats(raw_ostream &OS, + iterator_range &Functions) { + // Given a perfect profile, every positive-execution-count BB should be + // connected to an entry of the function through a positive-execution-count + // directed path in the control flow graph. + std::vector NumUnreachables; + std::vector SumECUnreachables; + std::vector FractionECUnreachables; + + for (const BinaryFunction *Function : Functions) { + if (Function->size() <= 1) + continue; + + // Compute the sum of all BB execution counts (ECs). + size_t NumPosECBBs = 0; + size_t SumAllBBEC = 0; + for (const BinaryBasicBlock &BB : *Function) { + const size_t BBEC = BB.getKnownExecutionCount(); + NumPosECBBs += !!BBEC; + SumAllBBEC += BBEC; + } + + // Perform BFS on subgraph of CFG induced by positive weight edges. + // Compute the number of BBs reachable from the entry(s) of the function and + // the sum of their execution counts (ECs). + std::unordered_set Visited; + std::queue Queue; + size_t SumReachableBBEC = 0; + + Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) { + const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset); + if (!EntryBB || EntryBB->getKnownExecutionCount() == 0) + return true; + Queue.push(EntryBB->getLayoutIndex()); + Visited.insert(EntryBB->getLayoutIndex()); + SumReachableBBEC += EntryBB->getKnownExecutionCount(); + return true; + }); + + const FunctionLayout &Layout = Function->getLayout(); + + while (!Queue.empty()) { + const unsigned BBIndex = Queue.front(); + const BinaryBasicBlock *BB = Layout.getBlock(BBIndex); + Queue.pop(); + for (const auto &[Succ, BI] : + llvm::zip(BB->successors(), BB->branch_info())) { + const uint64_t Count = BI.Count; + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0 || + !Visited.insert(Succ->getLayoutIndex()).second) + continue; + SumReachableBBEC += Succ->getKnownExecutionCount(); + Queue.push(Succ->getLayoutIndex()); + } + } + + const size_t NumReachableBBs = Visited.size(); + + const size_t NumPosECBBsUnreachableFromEntry = + NumPosECBBs - NumReachableBBs; + const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC; + const double FractionECUnreachable = + (double)SumUnreachableBBEC / SumAllBBEC; + + if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) { + OS << "Non-trivial CFG discontinuity observed in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + + NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry); + SumECUnreachables.push_back(SumUnreachableBBEC); + FractionECUnreachables.push_back(FractionECUnreachable); + } + + if (FractionECUnreachables.empty()) + return; + + llvm::sort(FractionECUnreachables); + const int Rank = int(FractionECUnreachables.size() * + opts::PercentileForProfileQualityCheck / 100); + OS << formatv("function CFG discontinuity {0:P}; ", + FractionECUnreachables[Rank]); + if (opts::Verbosity >= 1) { + OS << "\nabbreviations: EC = execution count, POS BBs = positive EC BBs\n" + << "distribution of NUM(unreachable POS BBs) per function\n"; + llvm::sort(NumUnreachables); + printDistribution(OS, NumUnreachables); + + OS << "distribution of SUM_EC(unreachable POS BBs) per function\n"; + llvm::sort(SumECUnreachables); + printDistribution(OS, SumECUnreachables); + + OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all " + "POS BBs))] per function\n"; + printDistribution(OS, FractionECUnreachables, /*Fraction=*/true); + } +} + +void printCallGraphFlowConservationStats( + raw_ostream &OS, iterator_range &Functions, + FlowInfo &TotalFlowMap) { + std::vector CallGraphGaps; + + for (const BinaryFunction *Function : Functions) { + if (Function->size() <= 1 || !Function->isSimple()) + continue; + + const uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &IncomingFlows = + TotalFlowMap.TotalIncomingFlows[FunctionNum]; + std::vector &OutgoingFlows = + TotalFlowMap.TotalOutgoingFlows[FunctionNum]; + FunctionFlowMapTy &CallGraphIncomingFlows = + TotalFlowMap.CallGraphIncomingFlows; + + // Only consider functions that are not a program entry. + if (CallGraphIncomingFlows.find(FunctionNum) != + CallGraphIncomingFlows.end()) { + uint64_t EntryInflow = 0; + uint64_t EntryOutflow = 0; + uint32_t NumConsideredEntryBlocks = 0; + + Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) { + const BinaryBasicBlock *EntryBB = + Function->getBasicBlockAtOffset(Offset); + if (!EntryBB || EntryBB->succ_size() == 0) + return true; + NumConsideredEntryBlocks++; + EntryInflow += IncomingFlows[EntryBB->getLayoutIndex()]; + EntryOutflow += OutgoingFlows[EntryBB->getLayoutIndex()]; + return true; + }); + + uint64_t NetEntryOutflow = 0; + if (EntryOutflow < EntryInflow) { + if (opts::Verbosity >= 2) { + // We expect entry blocks' CFG outflow >= inflow, i.e., it has a + // non-negative net outflow. If this is not the case, then raise a + // warning if requested. + OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow " + "in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + } else { + NetEntryOutflow = EntryOutflow - EntryInflow; + } + if (NumConsideredEntryBlocks > 0) { + const uint64_t CallGraphInflow = + TotalFlowMap.CallGraphIncomingFlows[Function->getFunctionNumber()]; + const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow); + const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow); + const double CallGraphGap = 1 - (double)Min / Max; + + if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) { + OS << "Nontrivial call graph gap of size " + << formatv("{0:P}", CallGraphGap) << " observed in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + + CallGraphGaps.push_back(CallGraphGap); + } + } + } + + if (CallGraphGaps.empty()) + return; + + llvm::sort(CallGraphGaps); + const int Rank = + int(CallGraphGaps.size() * opts::PercentileForProfileQualityCheck / 100); + OS << formatv("call graph flow conservation gap {0:P}; ", + CallGraphGaps[Rank]); + if (opts::Verbosity >= 1) { + OS << "\ndistribution of function entry flow conservation gaps\n"; + printDistribution(OS, CallGraphGaps, /*Fraction=*/true); + } +} + +void printCFGFlowConservationStats(raw_ostream &OS, + iterator_range &Functions, + FlowInfo &TotalFlowMap) { + std::vector CFGGapsWeightedAvg; + std::vector CFGGapsWorst; + std::vector CFGGapsWorstAbs; + // We only consider blocks with execution counts > MinBlockCount when + // reporting the distribution of worst gaps. + const uint16_t MinBlockCount = 500; + for (const BinaryFunction *Function : Functions) { + if (Function->size() <= 1 || !Function->isSimple()) + continue; + + const uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &MaxCountMaps = + TotalFlowMap.TotalMaxCountMaps[FunctionNum]; + std::vector &MinCountMaps = + TotalFlowMap.TotalMinCountMaps[FunctionNum]; + double WeightedGapSum = 0.0; + double WeightSum = 0.0; + double WorstGap = 0.0; + uint64_t WorstGapAbs = 0; + BinaryBasicBlock *BBWorstGap = nullptr; + BinaryBasicBlock *BBWorstGapAbs = nullptr; + for (BinaryBasicBlock &BB : *Function) { + // We don't consider function entry or exit blocks for CFG flow + // conservation + if (BB.isEntryPoint() || BB.succ_size() == 0) + continue; + + const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()]; + const uint64_t Min = MinCountMaps[BB.getLayoutIndex()]; + const double Gap = 1 - (double)Min / Max; + double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos(); + if (Weight == 0) + continue; + // We use log to prevent the stats from being dominated by extremely hot + // blocks + Weight = log(Weight); + WeightedGapSum += Gap * Weight; + WeightSum += Weight; + if (BB.getKnownExecutionCount() > MinBlockCount && Gap > WorstGap) { + WorstGap = Gap; + BBWorstGap = &BB; + } + if (BB.getKnownExecutionCount() > MinBlockCount && + Max - Min > WorstGapAbs) { + WorstGapAbs = Max - Min; + BBWorstGapAbs = &BB; + } + } + if (WeightSum > 0) { + const double WeightedGap = WeightedGapSum / WeightSum; + if (opts::Verbosity >= 2 && (WeightedGap >= 0.1 || WorstGap >= 0.9)) { + OS << "Nontrivial CFG gap observed in function " + << Function->getPrintName() << "\n" + << "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n"; + if (BBWorstGap) + OS << "Worst gap: " << formatv("{0:P}", WorstGap) + << " at BB with input offset: 0x" + << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n"; + if (BBWorstGapAbs) + OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with " + << "input offset 0x" + << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + + CFGGapsWeightedAvg.push_back(WeightedGap); + CFGGapsWorst.push_back(WorstGap); + CFGGapsWorstAbs.push_back(WorstGapAbs); + } + } + + if (CFGGapsWeightedAvg.empty()) + return; + llvm::sort(CFGGapsWeightedAvg); + const int RankWA = int(CFGGapsWeightedAvg.size() * + opts::PercentileForProfileQualityCheck / 100); + llvm::sort(CFGGapsWorst); + const int RankW = + int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100); + OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst)\n", + CFGGapsWeightedAvg[RankWA], CFGGapsWorst[RankW]); + if (opts::Verbosity >= 1) { + OS << "distribution of weighted CFG flow conservation gaps\n"; + printDistribution(OS, CFGGapsWeightedAvg, /*Fraction=*/true); + OS << format("Consider only blocks with execution counts > %zu:\n", + MinBlockCount) + << "distribution of worst block flow conservation gap per " + "function \n"; + printDistribution(OS, CFGGapsWorst, /*Fraction=*/true); + OS << "distribution of worst block flow conservation gap (absolute " + "value) per function\n"; + llvm::sort(CFGGapsWorstAbs); + printDistribution(OS, CFGGapsWorstAbs, /*Fraction=*/false); + } +} + +void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) { + // Increment block inflow and outflow with CFG jump counts. + TotalFlowMapTy &TotalIncomingFlows = TotalFlowMap.TotalIncomingFlows; + TotalFlowMapTy &TotalOutgoingFlows = TotalFlowMap.TotalOutgoingFlows; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + std::vector &IncomingFlows = + TotalIncomingFlows[Function->getFunctionNumber()]; + std::vector &OutgoingFlows = + TotalOutgoingFlows[Function->getFunctionNumber()]; + const uint64_t NumBlocks = Function->size(); + IncomingFlows.resize(NumBlocks, 0); + OutgoingFlows.resize(NumBlocks, 0); + if (Function->empty() || !Function->hasValidProfile()) + continue; + for (const BinaryBasicBlock &BB : *Function) { + uint64_t TotalOutgoing = 0ULL; + for (const auto &[Succ, BI] : + llvm::zip(BB.successors(), BB.branch_info())) { + const uint64_t Count = BI.Count; + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) + continue; + TotalOutgoing += Count; + IncomingFlows[Succ->getLayoutIndex()] += Count; + } + OutgoingFlows[BB.getLayoutIndex()] = TotalOutgoing; + } + } + // Initialize TotalMaxCountMaps and TotalMinCountMaps using + // TotalIncomingFlows and TotalOutgoingFlows + TotalFlowMapTy &TotalMaxCountMaps = TotalFlowMap.TotalMaxCountMaps; + TotalFlowMapTy &TotalMinCountMaps = TotalFlowMap.TotalMinCountMaps; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &IncomingFlows = TotalIncomingFlows[FunctionNum]; + std::vector &OutgoingFlows = TotalOutgoingFlows[FunctionNum]; + std::vector &MaxCountMap = TotalMaxCountMaps[FunctionNum]; + std::vector &MinCountMap = TotalMinCountMaps[FunctionNum]; + const uint64_t NumBlocks = Function->size(); + MaxCountMap.resize(NumBlocks, 0); + MinCountMap.resize(NumBlocks, 0); + if (Function->empty() || !Function->hasValidProfile()) + continue; + for (const BinaryBasicBlock &BB : *Function) { + uint64_t BBNum = BB.getLayoutIndex(); + MaxCountMap[BBNum] = std::max(IncomingFlows[BBNum], OutgoingFlows[BBNum]); + MinCountMap[BBNum] = std::min(IncomingFlows[BBNum], OutgoingFlows[BBNum]); + } + } + + // Modify TotalMaxCountMaps and TotalMinCountMaps using call counts and + // fill out CallGraphIncomingFlows + FunctionFlowMapTy &CallGraphIncomingFlows = + TotalFlowMap.CallGraphIncomingFlows; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &MaxCountMap = TotalMaxCountMaps[FunctionNum]; + std::vector &MinCountMap = TotalMinCountMaps[FunctionNum]; + + // Update MaxCountMap, MinCountMap, and CallGraphIncomingFlows + auto recordCall = [&](const BinaryBasicBlock *SourceBB, + const MCSymbol *DestSymbol, uint64_t Count, + uint64_t TotalCallCount) { + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE) + Count = 0; + const BinaryFunction *DstFunc = + DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr; + if (DstFunc) + CallGraphIncomingFlows[DstFunc->getFunctionNumber()] += Count; + if (SourceBB) { + unsigned BlockIndex = SourceBB->getLayoutIndex(); + MaxCountMap[BlockIndex] = + std::max(MaxCountMap[BlockIndex], TotalCallCount); + MinCountMap[BlockIndex] = + std::min(MinCountMap[BlockIndex], TotalCallCount); + } + }; + + // Get pairs of (symbol, count) for each target at this callsite. + // If the call is to an unknown function the symbol will be nullptr. + // If there is no profiling data the count will be COUNT_NO_PROFILE. + using TargetDesc = std::pair; + using CallInfoTy = std::vector; + auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) { + CallInfoTy Counts; + const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); + + if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) { + for (const auto &CSI : BC.MIB->getAnnotationAs( + Inst, "CallProfile")) + if (CSI.Symbol) + Counts.emplace_back(CSI.Symbol, CSI.Count); + } else { + const uint64_t Count = BB->getExecutionCount(); + Counts.emplace_back(DstSym, Count); + } + + return Counts; + }; + + // If the function has an invalid profile, try to use the perf data + // directly. The call EC is only used to update CallGraphIncomingFlows. + if (!Function->hasValidProfile() && !Function->getAllCallSites().empty()) { + for (const IndirectCallProfile &CSI : Function->getAllCallSites()) + if (CSI.Symbol) + recordCall(nullptr, CSI.Symbol, CSI.Count, CSI.Count); + continue; + } else { + // If the function has a valid profile + for (const BinaryBasicBlock &BB : *Function) { + for (const MCInst &Inst : BB) { + if (!BC.MIB->isCall(Inst)) + continue; + // Find call instructions and extract target symbols from each + // one. + const CallInfoTy CallInfo = getCallInfo(&BB, Inst); + // We need the total call count to update MaxCountMap and + // MinCountMap in recordCall for indirect calls + uint64_t TotalCallCount = 0; + for (const TargetDesc &CI : CallInfo) + TotalCallCount += CI.second; + for (const TargetDesc &CI : CallInfo) + recordCall(&BB, CI.first, CI.second, TotalCallCount); + } + } + } + } +} + +void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, + size_t NumTopFunctions) { + // Sort the list of functions by execution counts (reverse). + llvm::sort(ValidFunctions, + [&](const BinaryFunction *A, const BinaryFunction *B) { + return A->getKnownExecutionCount() > B->getKnownExecutionCount(); + }); + + const size_t RealNumTopFunctions = + std::min(NumTopFunctions, ValidFunctions.size()); + + iterator_range Functions( + ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions); + + FlowInfo TotalFlowMap; + computeFlowMappings(BC, TotalFlowMap); + + BC.outs() << format("BOLT-INFO: profile quality metrics for the hottest %zu " + "functions (reporting top %zu%% values): ", + RealNumTopFunctions, + 100 - opts::PercentileForProfileQualityCheck); + printCFGContinuityStats(BC.outs(), Functions); + printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + + // Print more detailed bucketed stats if requested. + if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) { + const size_t PerBucketSize = RealNumTopFunctions / 5; + BC.outs() << format( + "Detailed stats for 5 buckets, each with %zu functions:\n", + PerBucketSize); + + // For each bucket, print the CFG continuity stats of the functions in + // the bucket. + for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) { + const size_t StartIndex = BucketIndex * PerBucketSize; + const size_t EndIndex = StartIndex + PerBucketSize; + iterator_range Functions( + ValidFunctions.begin() + StartIndex, + ValidFunctions.begin() + EndIndex); + const size_t MaxFunctionExecutionCount = + ValidFunctions[StartIndex]->getKnownExecutionCount(); + const size_t MinFunctionExecutionCount = + ValidFunctions[EndIndex - 1]->getKnownExecutionCount(); + BC.outs() << format("----------------\n| Bucket %zu: " + "|\n----------------\n", + BucketIndex + 1) + << format( + "execution counts of the %zu functions in the bucket: " + "%zu-%zu\n", + EndIndex - StartIndex, MinFunctionExecutionCount, + MaxFunctionExecutionCount); + printCFGContinuityStats(BC.outs(), Functions); + printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + } + } +} +} // namespace + +bool PrintProfileQualityStats::shouldOptimize(const BinaryFunction &BF) const { + if (BF.empty() || !BF.hasValidProfile()) + return false; + + return BinaryFunctionPass::shouldOptimize(BF); +} + +Error PrintProfileQualityStats::runOnFunctions(BinaryContext &BC) { + // Create a list of functions with valid profiles. + FunctionListType ValidFunctions; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + if (PrintProfileQualityStats::shouldOptimize(*Function)) + ValidFunctions.push_back(Function); + } + if (ValidFunctions.empty() || opts::TopFunctionsForProfileQualityCheck == 0) + return Error::success(); + + printAll(BC, ValidFunctions, opts::TopFunctionsForProfileQualityCheck); + return Error::success(); +} diff --git a/bolt/lib/Passes/RetpolineInsertion.cpp b/bolt/lib/Passes/RetpolineInsertion.cpp index 171177d9e9333..f8702893a222b 100644 --- a/bolt/lib/Passes/RetpolineInsertion.cpp +++ b/bolt/lib/Passes/RetpolineInsertion.cpp @@ -33,19 +33,17 @@ namespace opts { extern cl::OptionCategory BoltCategory; -llvm::cl::opt InsertRetpolines("insert-retpolines", - cl::desc("run retpoline insertion pass"), - cl::cat(BoltCategory)); - -llvm::cl::opt -RetpolineLfence("retpoline-lfence", - cl::desc("determine if lfence instruction should exist in the retpoline"), - cl::init(true), - cl::ZeroOrMore, - cl::Hidden, - cl::cat(BoltCategory)); - -cl::opt R11Availability( +static llvm::cl::opt + InsertRetpolines("insert-retpolines", + cl::desc("run retpoline insertion pass"), + cl::cat(BoltCategory)); + +static llvm::cl::opt RetpolineLfence( + "retpoline-lfence", + cl::desc("determine if lfence instruction should exist in the retpoline"), + cl::init(true), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); + +static cl::opt R11Availability( "r11-availability", cl::desc("determine the availability of r11 before indirect branches"), cl::init(RetpolineInsertion::AvailabilityOptions::NEVER), diff --git a/bolt/lib/Passes/StokeInfo.cpp b/bolt/lib/Passes/StokeInfo.cpp index dd033508d200c..9da460a2877c9 100644 --- a/bolt/lib/Passes/StokeInfo.cpp +++ b/bolt/lib/Passes/StokeInfo.cpp @@ -21,7 +21,7 @@ using namespace llvm; using namespace bolt; namespace opts { -cl::OptionCategory StokeOptCategory("STOKE pass options"); +static cl::OptionCategory StokeOptCategory("STOKE pass options"); static cl::opt StokeOutputDataFilename("stoke-out", diff --git a/bolt/lib/Passes/TailDuplication.cpp b/bolt/lib/Passes/TailDuplication.cpp index 463ea49527fa6..354f9b78830c3 100644 --- a/bolt/lib/Passes/TailDuplication.cpp +++ b/bolt/lib/Passes/TailDuplication.cpp @@ -26,7 +26,7 @@ namespace opts { extern cl::OptionCategory BoltOptCategory; extern cl::opt NoThreads; -cl::opt TailDuplicationMode( +static cl::opt TailDuplicationMode( "tail-duplication", cl::desc("duplicate unconditional branches that cross a cache line"), cl::init(bolt::TailDuplication::TD_NONE), diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index b66a3f478f1a7..1a61949d77472 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -52,66 +52,66 @@ cl::opt cl::desc("Infer counts from stale profile data."), cl::init(false), cl::Hidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingMinMatchedBlock( +static cl::opt StaleMatchingMinMatchedBlock( "stale-matching-min-matched-block", cl::desc("Percentage threshold of matched basic blocks at which stale " "profile inference is executed."), cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingMaxFuncSize( +static cl::opt StaleMatchingMaxFuncSize( "stale-matching-max-func-size", cl::desc("The maximum size of a function to consider for inference."), cl::init(10000), cl::Hidden, cl::cat(BoltOptCategory)); // Parameters of the profile inference algorithm. The default values are tuned // on several benchmarks. -cl::opt StaleMatchingEvenFlowDistribution( +static cl::opt StaleMatchingEvenFlowDistribution( "stale-matching-even-flow-distribution", cl::desc("Try to evenly distribute flow when there are multiple equally " "likely options."), cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingRebalanceUnknown( +static cl::opt StaleMatchingRebalanceUnknown( "stale-matching-rebalance-unknown", cl::desc("Evenly re-distribute flow among unknown subgraphs."), cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingJoinIslands( +static cl::opt StaleMatchingJoinIslands( "stale-matching-join-islands", cl::desc("Join isolated components having positive flow."), cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingCostBlockInc( +static cl::opt StaleMatchingCostBlockInc( "stale-matching-cost-block-inc", cl::desc("The cost of increasing a block count by one."), cl::init(150), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingCostBlockDec( +static cl::opt StaleMatchingCostBlockDec( "stale-matching-cost-block-dec", cl::desc("The cost of decreasing a block count by one."), cl::init(150), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingCostJumpInc( +static cl::opt StaleMatchingCostJumpInc( "stale-matching-cost-jump-inc", cl::desc("The cost of increasing a jump count by one."), cl::init(150), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingCostJumpDec( +static cl::opt StaleMatchingCostJumpDec( "stale-matching-cost-jump-dec", cl::desc("The cost of decreasing a jump count by one."), cl::init(150), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingCostBlockUnknownInc( +static cl::opt StaleMatchingCostBlockUnknownInc( "stale-matching-cost-block-unknown-inc", cl::desc("The cost of increasing an unknown block count by one."), cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingCostJumpUnknownInc( +static cl::opt StaleMatchingCostJumpUnknownInc( "stale-matching-cost-jump-unknown-inc", cl::desc("The cost of increasing an unknown jump count by one."), cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingCostJumpUnknownFTInc( +static cl::opt StaleMatchingCostJumpUnknownFTInc( "stale-matching-cost-jump-unknown-ft-inc", cl::desc( "The cost of increasing an unknown fall-through jump count by one."), diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index e3872b373f417..f5636bfe3e1f1 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -28,7 +28,7 @@ extern cl::OptionCategory BoltOptCategory; extern cl::opt InferStaleProfile; extern cl::opt Lite; -cl::opt NameSimilarityFunctionMatchingThreshold( +static cl::opt NameSimilarityFunctionMatchingThreshold( "name-similarity-function-matching-threshold", cl::desc("Match functions using namespace and edit distance"), cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); @@ -38,11 +38,11 @@ static llvm::cl::opt cl::desc("ignore hash while reading function profile"), cl::Hidden, cl::cat(BoltOptCategory)); -llvm::cl::opt +static llvm::cl::opt MatchProfileWithFunctionHash("match-profile-with-function-hash", cl::desc("Match profile with function hash"), cl::Hidden, cl::cat(BoltOptCategory)); -llvm::cl::opt +static llvm::cl::opt MatchWithCallGraph("match-with-call-graph", cl::desc("Match functions with call graph"), cl::Hidden, cl::cat(BoltOptCategory)); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 2d851c751ae10..dd48653931eb9 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -12,7 +12,6 @@ #include "bolt/Passes/AllocCombiner.h" #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/CMOVConversion.h" -#include "bolt/Passes/ContinuityStats.h" #include "bolt/Passes/FixRISCVCallsPass.h" #include "bolt/Passes/FixRelaxationPass.h" #include "bolt/Passes/FrameOptimizer.h" @@ -27,6 +26,7 @@ #include "bolt/Passes/MCF.h" #include "bolt/Passes/PLTCall.h" #include "bolt/Passes/PatchEntries.h" +#include "bolt/Passes/ProfileQualityStats.h" #include "bolt/Passes/RegReAssign.h" #include "bolt/Passes/ReorderData.h" #include "bolt/Passes/ReorderFunctions.h" @@ -379,7 +379,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { if (opts::PrintProfileStats) Manager.registerPass(std::make_unique(NeverPrint)); - Manager.registerPass(std::make_unique(NeverPrint)); + Manager.registerPass(std::make_unique(NeverPrint)); Manager.registerPass(std::make_unique(NeverPrint)); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index a97762063eb1e..9a2d228718283 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -91,9 +91,10 @@ extern cl::opt ICF; -cl::opt AllowStripped("allow-stripped", - cl::desc("allow processing of stripped binaries"), - cl::Hidden, cl::cat(BoltCategory)); +static cl::opt + AllowStripped("allow-stripped", + cl::desc("allow processing of stripped binaries"), cl::Hidden, + cl::cat(BoltCategory)); static cl::opt ForceToDataRelocations( "force-data-relocations", @@ -101,7 +102,7 @@ static cl::opt ForceToDataRelocations( cl::Hidden, cl::cat(BoltCategory)); -cl::opt +static cl::opt BoltID("bolt-id", cl::desc("add any string to tag this execution in the " "output binary via bolt info section"), @@ -175,9 +176,10 @@ cl::opt PrintAll("print-all", cl::desc("print functions after each stage"), cl::Hidden, cl::cat(BoltCategory)); -cl::opt PrintProfile("print-profile", - cl::desc("print functions after attaching profile"), - cl::Hidden, cl::cat(BoltCategory)); +static cl::opt + PrintProfile("print-profile", + cl::desc("print functions after attaching profile"), + cl::Hidden, cl::cat(BoltCategory)); cl::opt PrintCFG("print-cfg", cl::desc("print functions after CFG construction"), @@ -218,11 +220,10 @@ SkipFunctionNamesFile("skip-funcs-file", cl::Hidden, cl::cat(BoltCategory)); -cl::opt -TrapOldCode("trap-old-code", - cl::desc("insert traps in old function bodies (relocation mode)"), - cl::Hidden, - cl::cat(BoltCategory)); +static cl::opt TrapOldCode( + "trap-old-code", + cl::desc("insert traps in old function bodies (relocation mode)"), + cl::Hidden, cl::cat(BoltCategory)); static cl::opt DWPPathName("dwp", cl::desc("Path and name to DWP file."), diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index f79d5a747246e..685b2279e5afb 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "AArch64MCSymbolizer.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCExpr.h" @@ -134,6 +135,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { public: using MCPlusBuilder::MCPlusBuilder; + std::unique_ptr + createTargetSymbolizer(BinaryFunction &Function, + bool CreateNewSymbols) const override { + return std::make_unique(Function, CreateNewSymbols); + } + MCPhysReg getStackPointer() const override { return AArch64::SP; } MCPhysReg getFramePointer() const override { return AArch64::FP; } @@ -278,13 +285,23 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { return Inst.getOpcode() == AArch64::ADDXri; } - void getADRReg(const MCInst &Inst, MCPhysReg &RegName) const override { + MCPhysReg getADRReg(const MCInst &Inst) const { assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction"); assert(MCPlus::getNumPrimeOperands(Inst) != 0 && "No operands for ADR instruction"); assert(Inst.getOperand(0).isReg() && "Unexpected operand in ADR instruction"); - RegName = Inst.getOperand(0).getReg(); + return Inst.getOperand(0).getReg(); + } + + InstructionListType undoAdrpAddRelaxation(const MCInst &ADRInst, + MCContext *Ctx) const override { + assert(isADR(ADRInst) && "ADR instruction expected"); + + const MCPhysReg Reg = getADRReg(ADRInst); + const MCSymbol *Target = getTargetSymbol(ADRInst); + const uint64_t Addend = getTargetAddend(ADRInst); + return materializeAddress(Target, Ctx, Reg, Addend); } bool isTB(const MCInst &Inst) const { diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp new file mode 100644 index 0000000000000..d08bca6e0fc3e --- /dev/null +++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp @@ -0,0 +1,96 @@ +//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCSymbolizer.h" +#include "bolt/Core/BinaryContext.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/MCPlusBuilder.h" +#include "bolt/Core/Relocation.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "bolt-symbolizer" + +namespace llvm { +namespace bolt { + +AArch64MCSymbolizer::~AArch64MCSymbolizer() {} + +bool AArch64MCSymbolizer::tryAddingSymbolicOperand( + MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress, + bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) { + BinaryContext &BC = Function.getBinaryContext(); + MCContext *Ctx = BC.Ctx.get(); + + // NOTE: the callee may incorrectly set IsBranch. + if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) + return false; + + const uint64_t InstOffset = InstAddress - Function.getAddress(); + const Relocation *Relocation = Function.getRelocationAt(InstOffset); + + /// Add symbolic operand to the instruction with an optional addend. + auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend, + uint64_t RelType) { + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx); + if (Addend) + Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx), + *Ctx); + Inst.addOperand(MCOperand::createExpr( + BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType))); + }; + + // The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into + // NOP+ADR. After the conversion, the linker might keep the relocations and + // if we try to symbolize ADR's operand using outdated relocations, we might + // get unexpected results. Hence, we check for the conversion/relaxation, and + // ignore the relocation. The symbolization is done based on the PC-relative + // value of the operand instead. + if (Relocation && BC.MIB->isADR(Inst)) { + if (Relocation->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC || + Relocation->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) { + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x" + << Twine::utohexstr(InstAddress) << '\n'); + Relocation = nullptr; + } + } + + if (Relocation) { + addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type); + return true; + } + + if (!BC.MIB->hasPCRelOperand(Inst)) + return false; + + Value += InstAddress; + const MCSymbol *TargetSymbol; + uint64_t TargetOffset; + if (!CreateNewSymbols) { + if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) { + TargetSymbol = BD->getSymbol(); + TargetOffset = Value - BD->getAddress(); + } else { + return false; + } + } else { + std::tie(TargetSymbol, TargetOffset) = + BC.handleAddressRef(Value, Function, /*IsPCRel*/ true); + } + + addOperand(TargetSymbol, TargetOffset, 0); + + return true; +} + +void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream, + int64_t Value, + uint64_t Address) {} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h new file mode 100644 index 0000000000000..56ba4fbcaf275 --- /dev/null +++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h @@ -0,0 +1,44 @@ +//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H +#define BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H + +#include "bolt/Core/BinaryFunction.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" + +namespace llvm { +namespace bolt { + +class AArch64MCSymbolizer : public MCSymbolizer { +protected: + BinaryFunction &Function; + bool CreateNewSymbols{true}; + +public: + AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true) + : MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr), + Function(Function), CreateNewSymbols(CreateNewSymbols) {} + + AArch64MCSymbolizer(const AArch64MCSymbolizer &) = delete; + AArch64MCSymbolizer &operator=(const AArch64MCSymbolizer &) = delete; + virtual ~AArch64MCSymbolizer(); + + bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) override; + + void tryAddingPcLoadReferenceComment(raw_ostream &CStream, int64_t Value, + uint64_t Address) override; +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/lib/Target/AArch64/CMakeLists.txt b/bolt/lib/Target/AArch64/CMakeLists.txt index 8435ea7245e7e..cb38117de659e 100644 --- a/bolt/lib/Target/AArch64/CMakeLists.txt +++ b/bolt/lib/Target/AArch64/CMakeLists.txt @@ -1,5 +1,6 @@ set(LLVM_LINK_COMPONENTS MC + MCDisassembler Support AArch64Desc ) @@ -18,6 +19,7 @@ endif() add_llvm_library(LLVMBOLTTargetAArch64 AArch64MCPlusBuilder.cpp + AArch64MCSymbolizer.cpp NO_EXPORT DISABLE_LLVM_LINK_LLVM_DYLIB diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp index 05c1be4f2d70c..a89cba2243c48 100644 --- a/bolt/runtime/hugify.cpp +++ b/bolt/runtime/hugify.cpp @@ -85,7 +85,8 @@ static bool hasPagecacheTHPSupport() { KernelVersionTy KernelVersion; getKernelVersion((uint32_t *)&KernelVersion); - if (KernelVersion.major >= 5 && KernelVersion.minor >= 10) + if (KernelVersion.major >= 6 || + (KernelVersion.major == 5 && KernelVersion.minor >= 10)) return true; return false; diff --git a/bolt/test/X86/Inputs/define_bar.s b/bolt/test/X86/Inputs/define_bar.s index 8c5fb43f25007..0b24e5da147eb 100644 --- a/bolt/test/X86/Inputs/define_bar.s +++ b/bolt/test/X86/Inputs/define_bar.s @@ -1,5 +1,5 @@ # Mocks a vtable object weak def in the C++ stdlib. - .data.rel.ro + .section .data.rel.ro,"aw" .weak bar .type bar, %object bar: diff --git a/bolt/test/X86/cfg-discontinuity-reporting.test b/bolt/test/X86/cfg-discontinuity-reporting.test deleted file mode 100644 index 4d7d3305cdb75..0000000000000 --- a/bolt/test/X86/cfg-discontinuity-reporting.test +++ /dev/null @@ -1,4 +0,0 @@ -## Check profile discontinuity reporting -RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe -RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s -CHECK: among the hottest 5 functions top 5% function CFG discontinuity is 100.00% diff --git a/bolt/test/X86/profile-quality-reporting.test b/bolt/test/X86/profile-quality-reporting.test new file mode 100644 index 0000000000000..2e15a6b245afa --- /dev/null +++ b/bolt/test/X86/profile-quality-reporting.test @@ -0,0 +1,4 @@ +## Check profile quality stats reporting +RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe +RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s +CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst) diff --git a/bolt/tools/bat-dump/bat-dump.cpp b/bolt/tools/bat-dump/bat-dump.cpp index 709eb076bca2d..8a743cba17540 100644 --- a/bolt/tools/bat-dump/bat-dump.cpp +++ b/bolt/tools/bat-dump/bat-dump.cpp @@ -39,7 +39,7 @@ using namespace bolt; namespace opts { -cl::OptionCategory BatDumpCategory("BAT dump options"); +static cl::OptionCategory BatDumpCategory("BAT dump options"); static cl::OptionCategory *BatDumpCategories[] = {&BatDumpCategory}; diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp index 6b6714723fa3b..b9836c2397b6b 100644 --- a/bolt/tools/driver/llvm-bolt.cpp +++ b/bolt/tools/driver/llvm-bolt.cpp @@ -63,7 +63,7 @@ BoltProfile("b", cl::aliasopt(InputDataFilename), cl::cat(BoltCategory)); -cl::opt +static cl::opt LogFile("log-file", cl::desc("redirect journaling to a file instead of stdout/stderr"), cl::Hidden, cl::cat(BoltCategory)); diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp index 74a5f8ca2d477..864aa67474199 100644 --- a/bolt/tools/merge-fdata/merge-fdata.cpp +++ b/bolt/tools/merge-fdata/merge-fdata.cpp @@ -31,7 +31,7 @@ using namespace llvm::yaml::bolt; namespace opts { -cl::OptionCategory MergeFdataCategory("merge-fdata options"); +static cl::OptionCategory MergeFdataCategory("merge-fdata options"); enum SortType : char { ST_NONE, diff --git a/bolt/utils/docker/Dockerfile b/bolt/utils/docker/Dockerfile index c2108f7aec53c..c58e1a533df94 100644 --- a/bolt/utils/docker/Dockerfile +++ b/bolt/utils/docker/Dockerfile @@ -23,8 +23,7 @@ RUN mkdir build && \ -DCMAKE_EXE_LINKER_FLAGS="-Wl,--push-state -Wl,-whole-archive -ljemalloc_pic -Wl,--pop-state -lpthread -lstdc++ -lm -ldl" \ -DCMAKE_INSTALL_PREFIX=/home/bolt/install && \ ninja check-bolt && \ - ninja install-llvm-bolt install-perf2bolt install-merge-fdata \ - install-llvm-boltdiff install-bolt_rt + ninja install-llvm-bolt install-merge-fdata install-bolt_rt FROM ubuntu:24.04 diff --git a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp index 5d74907aa9fab..d5c5fa3364d63 100644 --- a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp @@ -184,8 +184,16 @@ void UnusedUsingDeclsCheck::check(const MatchFinder::MatchResult &Result) { return; } // Check user-defined literals - if (const auto *UDL = Result.Nodes.getNodeAs("used")) - removeFromFoundDecls(UDL->getCalleeDecl()); + if (const auto *UDL = Result.Nodes.getNodeAs("used")) { + const Decl *CalleeDecl = UDL->getCalleeDecl(); + if (const auto *FD = dyn_cast(CalleeDecl)) { + if (const FunctionTemplateDecl *FPT = FD->getPrimaryTemplate()) { + removeFromFoundDecls(FPT); + return; + } + } + removeFromFoundDecls(CalleeDecl); + } } void UnusedUsingDeclsCheck::removeFromFoundDecls(const Decl *D) { diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 52be15d3da936..49a97da2bfa42 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -460,18 +460,24 @@ void ClangdServer::codeComplete(PathRef File, Position Pos, CodeCompleteResult Result = clangd::codeComplete( File, Pos, IP->Preamble, ParseInput, CodeCompleteOpts, SpecFuzzyFind ? &*SpecFuzzyFind : nullptr); + // We don't want `codeComplete` to wait for the async call if it doesn't use + // the result (e.g. non-index completion, speculation fails), so that `CB` + // is called as soon as results are available. { clang::clangd::trace::Span Tracer("Completion results callback"); CB(std::move(Result)); } - if (SpecFuzzyFind && SpecFuzzyFind->NewReq) { + if (!SpecFuzzyFind) + return; + if (SpecFuzzyFind->NewReq) { std::lock_guard Lock(CachedCompletionFuzzyFindRequestMutex); CachedCompletionFuzzyFindRequestByFile[File] = *SpecFuzzyFind->NewReq; } - // SpecFuzzyFind is only destroyed after speculative fuzzy find finishes. - // We don't want `codeComplete` to wait for the async call if it doesn't use - // the result (e.g. non-index completion, speculation fails), so that `CB` - // is called as soon as results are available. + // Explicitly block until async task completes, this is fine as we've + // already provided reply to the client and running as a preamble task + // (i.e. won't block other preamble tasks). + if (SpecFuzzyFind->Result.valid()) + SpecFuzzyFind->Result.wait(); }; // We use a potentially-stale preamble because latency is critical here. diff --git a/clang-tools-extra/clangd/CodeComplete.h b/clang-tools-extra/clangd/CodeComplete.h index bb2ebd9478645..cd41f04e4fb5c 100644 --- a/clang-tools-extra/clangd/CodeComplete.h +++ b/clang-tools-extra/clangd/CodeComplete.h @@ -274,7 +274,6 @@ struct SpeculativeFuzzyFind { /// Set by `codeComplete()`. This can be used by callers to update cache. std::optional NewReq; /// The result is consumed by `codeComplete()` if speculation succeeded. - /// NOTE: the destructor will wait for the async call to finish. std::future> Result; }; diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp index 2ac123246a4cb..1d79a7a7399ec 100644 --- a/clang-tools-extra/clangd/TidyProvider.cpp +++ b/clang-tools-extra/clangd/TidyProvider.cpp @@ -210,6 +210,7 @@ TidyProvider disableUnusableChecks(llvm::ArrayRef ExtraBadChecks) { // Check relies on seeing ifndef/define/endif directives, // clangd doesn't replay those when using a preamble. "-llvm-header-guard", "-modernize-macro-to-enum", + "-cppcoreguidelines-macro-to-enum", // ----- Crashing Checks ----- diff --git a/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp b/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp index 3a320260238b6..5432b1ef37ec2 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp @@ -51,7 +51,8 @@ Expected AnnotateHighlightings::apply(const Selection &Inputs) { *Inputs.AST, /*IncludeInactiveRegionTokens=*/true); } else { // Store the existing scopes. - const auto &BackupScopes = Inputs.AST->getASTContext().getTraversalScope(); + const std::vector BackupScopes = + Inputs.AST->getASTContext().getTraversalScope(); // Narrow the traversal scope to the selected node. Inputs.AST->getASTContext().setTraversalScope( {const_cast(CommonDecl)}); diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index 7a47d6ebebf3b..f9ff6f21009f3 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -823,6 +823,21 @@ TEST(DiagnosticTest, ClangTidyNoLiteralDataInMacroToken) { EXPECT_THAT(TU.build().getDiagnostics(), UnorderedElementsAre()); // no-crash } +TEST(DiagnosticTest, ClangTidyMacroToEnumCheck) { + Annotations Main(R"cpp( + #if 1 + auto foo(); + #endif + )cpp"); + TestTU TU = TestTU::withCode(Main.code()); + std::vector Providers; + Providers.push_back( + addTidyChecks("cppcoreguidelines-macro-to-enum,modernize-macro-to-enum")); + Providers.push_back(disableUnusableChecks()); + TU.ClangTidyProvider = combine(std::move(Providers)); + EXPECT_THAT(TU.build().getDiagnostics(), UnorderedElementsAre()); // no-crash +} + TEST(DiagnosticTest, ElseAfterReturnRange) { Annotations Main(R"cpp( int foo(int cond) { diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index a8d17d19fda1d..ce1418a2a7d58 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -112,7 +112,8 @@ Changes in existing checks ` fixing false positives from smart pointer accessors repeated in checking ``has_value`` and accessing ``value``. The option `IgnoreSmartPointerDereference` should - no longer be needed and will be removed. + no longer be needed and will be removed. Also fixing false positive from + const reference accessors to objects containing optional member. - Improved :doc:`bugprone-unsafe-functions ` check to allow specifying @@ -121,7 +122,8 @@ Changes in existing checks - Improved :doc:`misc-const-correctness ` check by adding the option `AllowedTypes`, that excludes specified types from const-correctness - checking. + checking and fixing false positives when modifying variant by ``operator[]`` + with template in parameters. - Improved :doc:`misc-redundant-expression ` check by providing additional @@ -136,6 +138,10 @@ Changes in existing checks ` check by fixing false negatives on ternary operators calling ``std::move``. +- Improved :doc:`misc-unused-using-decls + ` check by fixing false positives + on ``operator""`` with template parameters. + Removed checks ^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/docs/clang-tidy/Contributing.rst b/clang-tools-extra/docs/clang-tidy/Contributing.rst index 4f1df8d114444..9611c655886f2 100644 --- a/clang-tools-extra/docs/clang-tidy/Contributing.rst +++ b/clang-tools-extra/docs/clang-tidy/Contributing.rst @@ -149,6 +149,9 @@ After choosing the module and the name for the check, run the ``clang-tidy/add_new_check.py`` script to create the skeleton of the check and plug it to :program:`clang-tidy`. It's the recommended way of adding new checks. +By default, the new check will apply only to C++ code. If it should apply under +different language options, use the ``--language`` script's parameter. + If we want to create a `readability-awesome-function-names`, we would run: .. code-block:: console @@ -171,9 +174,7 @@ Let's see in more detail at the check class definition: #include "../ClangTidyCheck.h" - namespace clang { - namespace tidy { - namespace readability { + namespace clang::tidy::readability { ... class AwesomeFunctionNamesCheck : public ClangTidyCheck { @@ -182,11 +183,12 @@ Let's see in more detail at the check class definition: : ClangTidyCheck(Name, Context) {} void registerMatchers(ast_matchers::MatchFinder *Finder) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus; + } }; - } // namespace readability - } // namespace tidy - } // namespace clang + } // namespace clang::tidy::readability ... @@ -231,9 +233,6 @@ override the method ``registerPPCallbacks``. The ``add_new_check.py`` script does not generate an override for this method in the starting point for your new check. -If your check applies only under a specific set of language options, be sure -to override the method ``isLanguageVersionSupported`` to reflect that. - Check development tips ---------------------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp index 5efb64bca2374..654deead4efc8 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp @@ -998,3 +998,11 @@ void member_pointer_const(Value &x, PointerToConstMemberFunction m) { // CHECK-MESSAGES:[[@LINE-1]]:3: warning: variable 'member_pointer_tmp' of type 'Value &' can be declared 'const' (member_pointer_tmp.*m)(); } + +namespace gh127776_false_positive { +template struct vector { T &operator[](int t); }; +template void f() { + vector x; + x[T{}] = 3; +} +} // namespace gh127776_false_positive diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/unused-using-decls.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/unused-using-decls.cpp index 12fc18f340f21..62aa17b0b1c22 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/unused-using-decls.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/unused-using-decls.cpp @@ -222,3 +222,19 @@ using gh69714::StructGH69714_1; using gh69714::StructGH69714_2; struct StructGH69714_1 a; struct StructGH69714_2 *b; + +namespace gh53444 { +namespace my_literals { + template + int operator""_r() { + return {}; + } +} + +using my_literals::operator"" _r; + +int foo() { + auto x2 = 123_r; +} + +} diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index 722562220eeea..879a0a3c5c58c 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -2713,6 +2713,21 @@ def visitor(base, children): conf.lib.clang_visitCXXBaseClasses(self, fields_visit_callback(visitor), bases) return iter(bases) + def get_methods(self): + """Return an iterator for accessing the methods of this type.""" + + def visitor(method, children): + assert method != conf.lib.clang_getNullCursor() + + # Create reference to TU so it isn't GC'd before Cursor. + method._tu = self._tu + methods.append(method) + return 1 # continue + + methods: list[Cursor] = [] + conf.lib.clang_visitCXXMethods(self, fields_visit_callback(visitor), methods) + return iter(methods) + def get_exception_specification_kind(self): """ Return the kind of the exception specification; a value from @@ -4020,6 +4035,7 @@ def set_property(self, property, value): ), ("clang_visitChildren", [Cursor, cursor_visit_callback, py_object], c_uint), ("clang_visitCXXBaseClasses", [Type, fields_visit_callback, py_object], c_uint), + ("clang_visitCXXMethods", [Type, fields_visit_callback, py_object], c_uint), ("clang_Cursor_getNumArguments", [Cursor], c_int), ("clang_Cursor_getArgument", [Cursor, c_uint], Cursor), ("clang_Cursor_getNumTemplateArguments", [Cursor], c_int), diff --git a/clang/bindings/python/tests/cindex/test_type.py b/clang/bindings/python/tests/cindex/test_type.py index 9bac33f3041f4..bc893d509524e 100644 --- a/clang/bindings/python/tests/cindex/test_type.py +++ b/clang/bindings/python/tests/cindex/test_type.py @@ -559,3 +559,21 @@ class Template : public A, public B, virtual C { self.assertEqual(bases[1].get_base_offsetof(cursor_type_decl), 96) self.assertTrue(bases[2].is_virtual_base()) self.assertEqual(bases[2].get_base_offsetof(cursor_type_decl), 128) + + def test_class_methods(self): + source = """ + template + class Template { void Foo(); }; + typedef Template instance; + instance bar; + """ + tu = get_tu(source, lang="cpp", flags=["--target=x86_64-linux-gnu"]) + cursor = get_cursor(tu, "instance") + cursor_type = cursor.underlying_typedef_type + self.assertEqual(cursor.kind, CursorKind.TYPEDEF_DECL) + methods = list(cursor_type.get_methods()) + self.assertEqual(len(methods), 4) + self.assertEqual(methods[0].kind, CursorKind.CXX_METHOD) + self.assertEqual(methods[1].kind, CursorKind.CONSTRUCTOR) + self.assertEqual(methods[2].kind, CursorKind.CONSTRUCTOR) + self.assertEqual(methods[3].kind, CursorKind.CONSTRUCTOR) diff --git a/clang/cmake/caches/BOLT.cmake b/clang/cmake/caches/BOLT.cmake index eba2346b2f4ca..1956c10463148 100644 --- a/clang/cmake/caches/BOLT.cmake +++ b/clang/cmake/caches/BOLT.cmake @@ -1,6 +1,7 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "") set(CLANG_BOLT "INSTRUMENT" CACHE STRING "") set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") +set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "") set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst index ca8176f854729..f0d94a4e628b0 100644 --- a/clang/docs/CommandGuide/clang.rst +++ b/clang/docs/CommandGuide/clang.rst @@ -733,16 +733,19 @@ ENVIRONMENT .. envvar:: CPATH - If this environment variable is present, it is treated as a delimited list of - paths to be added to the default system include path list. The delimiter is - the platform dependent delimiter, as used in the PATH environment variable. - - Empty components in the environment variable are ignored. + This environment variable specifies additional (non-system) header search + paths to be used to find included header files. These paths are searched after + paths specified with the :option:`-I\` option, but before any + system header search paths. Paths are delimited by the platform dependent + delimiter as used in the ``PATH`` environment variable. Empty entries in the + delimited path list, including those at the beginning or end of the list, are + treated as specifying the compiler's current working directory. .. envvar:: C_INCLUDE_PATH, OBJC_INCLUDE_PATH, CPLUS_INCLUDE_PATH, OBJCPLUS_INCLUDE_PATH - These environment variables specify additional paths, as for :envvar:`CPATH`, which are - only used when processing the appropriate language. + These environment variables specify additional system header file search + paths to be used when processing the corresponding language. Search paths are + delimited as for the :envvar:`CPATH` environment variable. .. envvar:: MACOSX_DEPLOYMENT_TARGET diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst index 481ed39230813..8f473c21e1918 100644 --- a/clang/docs/HIPSupport.rst +++ b/clang/docs/HIPSupport.rst @@ -286,6 +286,26 @@ Example Usage basePtr->virtualFunction(); // Allowed since obj is constructed in device code } +Host and Device Attributes of Default Destructors +=================================================== + +If a default destructor does not have explicit host or device attributes, +clang infers these attributes based on the destructors of its data members +and base classes. If any conflicts are detected among these destructors, +clang diagnoses the issue. Otherwise, clang adds an implicit host or device +attribute according to whether the data members's and base classes's +destructors can execute on the host or device side. + +For explicit template classes with virtual destructors, which must be emitted, +the inference adopts a conservative approach. In this case, implicit host or +device attributes from member and base class destructors are ignored. This +precaution is necessary because, although a constexpr destructor carries +implicit host or device attributes, a constexpr function may call a +non-constexpr function, which is by default a host function. + +Users can override the inferred host and device attributes of default +destructors by adding explicit host and device attributes to them. + C++ Standard Parallelism Offload Support: Compiler And Runtime ============================================================== diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2b72143482943..28856c27317f3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -35,6 +35,9 @@ Potentially Breaking Changes ============================ - The Objective-C ARC migrator (ARCMigrate) has been removed. +- Fix missing diagnostics for uses of declarations when performing typename access, + such as when performing member access on a '[[deprecated]]' type alias. + (#GH58547) C/C++ Language Potentially Breaking Changes ------------------------------------------- @@ -59,6 +62,9 @@ AST Dumping Potentially Breaking Changes Clang Frontend Potentially Breaking Changes ------------------------------------------- +- The ``-Wglobal-constructors`` flag now applies to ``[[gnu::constructor]]`` and + ``[[gnu::destructor]]`` attributes. + Clang Python Bindings Potentially Breaking Changes -------------------------------------------------- @@ -93,6 +99,9 @@ Resolutions to C++ Defect Reports - Implemented `CWG2918 Consideration of constraints for address of overloaded ` `function `_ +- Bumped the ``__cpp_constexpr`` feature-test macro to ``202002L`` in C++20 mode as indicated in + `P2493R0 `_. + C Language Changes ------------------ @@ -114,6 +123,8 @@ Non-comprehensive list of changes in this release New Compiler Flags ------------------ +- New option ``-Wundef-true`` added and enabled by default to warn when `true` is used in the C preprocessor without being defined before C23. + - New option ``-fprofile-continuous`` added to enable continuous profile syncing to file (#GH124353, `docs `_). The feature has `existed `_) for a while and this is just a user facing option. @@ -124,6 +135,10 @@ Deprecated Compiler Flags Modified Compiler Flags ----------------------- +- The ARM AArch32 ``-mtp`` option accepts and defaults to ``auto``, a value of ``auto`` uses the best available method of providing the frame pointer supported by the hardware. This matches + the behavior of ``-mtp`` in gcc. This changes the default behavior for ARM targets that provide the ``TPIDRURO`` register as this will be used instead of a call to the ``__aeabi_read_tp``. + Programs that use ``__aeabi_read_tp`` but do not use the ``TPIDRURO`` register must use ``-mtp=soft``. Fixes #123864 + Removed Compiler Flags ------------------------- @@ -138,6 +153,9 @@ related warnings within the method body. ``__attribute__((model("large")))`` on non-TLS globals in x86-64 compilations. This forces the global to be considered small or large in regards to the x86-64 code model, regardless of the code model specified for the compilation. +- Clang now emits a warning ``-Wreserved-init-priority`` instead of a hard error + when ``__attribute__((init_priority(n)))`` is used with values of n in the + reserved range [0, 100]. The warning will be treated as an error by default. - There is a new ``format_matches`` attribute to complement the existing ``format`` attribute. ``format_matches`` allows the compiler to verify that @@ -159,7 +177,7 @@ related warnings within the method body. print_status("%s (%#08x)\n"); // order of %s and %x is swapped but there is no diagnostic } - + Before the introducion of ``format_matches``, this code cannot be verified at compile-time. ``format_matches`` plugs that hole: @@ -207,6 +225,7 @@ Improvements to Clang's diagnostics under the subgroup ``-Wunsafe-buffer-usage-in-libc-call``. - Diagnostics on chained comparisons (``a < b < c``) are now an error by default. This can be disabled with ``-Wno-error=parentheses``. +- The ``-Wshift-bool`` warning has been added to warn about shifting a boolean. (#GH28334) - The :doc:`ThreadSafetyAnalysis` now supports ``-Wthread-safety-pointer``, which enables warning on passing or returning pointers to guarded variables @@ -214,6 +233,10 @@ Improvements to Clang's diagnostics :doc:`ThreadSafetyAnalysis` still does not perform alias analysis. The feature will be default-enabled with ``-Wthread-safety`` in a future release. +- Improve the diagnostics for chained comparisons to report actual expressions and operators (#GH129069). + +- Improve the diagnostics for shadows template parameter to report correct location (#GH129060). + Improvements to Clang's time-trace ---------------------------------- @@ -225,6 +248,16 @@ Bug Fixes in This Version - Clang now outputs correct values when #embed data contains bytes with negative signed char values (#GH102798). +- Fixed rejects-valid problem when #embed appears in std::initializer_list or + when it can affect template argument deduction (#GH122306). +- Fix crash on code completion of function calls involving partial order of function templates + (#GH125500). +- Fixed clang crash when #embed data does not fit into an array + (#GH128987). +- Non-local variable and non-variable declarations in the first clause of a ``for`` loop in C are no longer incorrectly + considered an error in C23 mode and are allowed as an extension in earlier language modes. + +- Remove the ``static`` specifier for the value of ``_FUNCTION_`` for static functions, in MSVC compatibility mode. Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -246,13 +279,27 @@ Bug Fixes to C++ Support - Clang is now better at keeping track of friend function template instance contexts. (#GH55509) - Clang now prints the correct instantiation context for diagnostics suppressed by template argument deduction. +- Clang is now better at instantiating the function definition after its use inside + of a constexpr lambda. (#GH125747) - The initialization kind of elements of structured bindings direct-list-initialized from an array is corrected to direct-initialization. - Clang no longer crashes when a coroutine is declared ``[[noreturn]]``. (#GH127327) +- Clang now uses the parameter location for abbreviated function templates in ``extern "C"``. (#GH46386) + +Improvements to C++ diagnostics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Clang now more consistently adds a note pointing to the relevant template + parameter. Some diagnostics are reworded to better take advantage of this. +- Template Template Parameter diagnostics now stop referring to template + parameters as template arguments, in some circumstances, better hiding + from the users template template parameter partial ordering arcana. + Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ - Fixed type checking when a statement expression ends in an l-value of atomic type. (#GH106576) +- Fixed uninitialized use check in a lambda within CXXOperatorCallExpr. (#GH129198) Miscellaneous Bug Fixes ^^^^^^^^^^^^^^^^^^^^^^^ @@ -296,6 +343,8 @@ Android Support Windows Support ^^^^^^^^^^^^^^^ +- Clang now supports MSVC vector deleting destructors (GH19772). + LoongArch Support ^^^^^^^^^^^^^^^^^ @@ -334,6 +383,8 @@ Fixed Point Support in Clang AST Matchers ------------ +- Ensure ``isDerivedFrom`` matches the correct base in case more than one alias exists. + clang-format ------------ @@ -347,6 +398,8 @@ clang-format libclang -------- +- Added ``clang_visitCXXMethods``, which allows visiting the methods + of a class. - Fixed a buffer overflow in ``CXString`` implementation. The fix may result in increased memory allocation. @@ -384,8 +437,12 @@ Moved checkers Sanitizers ---------- +- ``-fsanitize=vptr`` is no longer a part of ``-fsanitize=undefined``. + Python Binding Changes ---------------------- +- Added ``Type.get_methods``, a binding for ``clang_visitCXXMethods``, which + allows visiting the methods of a class. OpenMP Support -------------- diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index c4895fb9722bf..0a2d833783e57 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -214,13 +214,14 @@ Available checks are: the wrong dynamic type, or that its lifetime has not begun or has ended. Incompatible with ``-fno-rtti``. Link must be performed by ``clang++``, not ``clang``, to make sure C++-specific parts of the runtime library and C++ - standard libraries are present. + standard libraries are present. The check is not a part of the ``undefined`` + group. Also it does not support ``-fsanitize-trap=vptr``. You can also use the following check groups: - ``-fsanitize=undefined``: All of the checks listed above other than ``float-divide-by-zero``, ``unsigned-integer-overflow``, - ``implicit-conversion``, ``local-bounds`` and the ``nullability-*`` group - of checks. + ``implicit-conversion``, ``local-bounds``, ``vptr`` and the + ``nullability-*`` group of checks. - ``-fsanitize=undefined-trap``: Deprecated alias of ``-fsanitize=undefined``. - ``-fsanitize=implicit-integer-truncation``: Catches lossy integral diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index c1eedb33e74d2..b817a99a1c56f 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -118,19 +118,6 @@ core.NullDereference (C, C++, ObjC) """"""""""""""""""""""""""""""""""" Check for dereferences of null pointers. -This checker specifically does -not report null pointer dereferences for x86 and x86-64 targets when the -address space is 256 (x86 GS Segment), 257 (x86 FS Segment), or 258 (x86 SS -segment). See `X86/X86-64 Language Extensions -`__ -for reference. - -The ``SuppressAddressSpaces`` option suppresses -warnings for null dereferences of all pointers with address spaces. You can -disable this behavior with the option -``-analyzer-config core.NullDereference:SuppressAddressSpaces=false``. -*Defaults to true*. - .. code-block:: objc // C @@ -170,6 +157,19 @@ disable this behavior with the option obj->x = 1; // warn } +Null pointer dereferences of pointers with address spaces are not always defined +as error. Specifically on x86/x86-64 target if the pointer address space is +256 (x86 GS Segment), 257 (x86 FS Segment), or 258 (x86 SS Segment), a null +dereference is not defined as error. See `X86/X86-64 Language Extensions +`__ +for reference. + +If the analyzer option ``suppress-dereferences-from-any-address-space`` is set +to true (the default value), then this checker never reports dereference of +pointers with a specified address space. If the option is set to false, then +reports from the specific x86 address spaces 256, 257 and 258 are still +suppressed, but null dereferences from other address spaces are reported. + .. _core-StackAddressEscape: core.StackAddressEscape (C) @@ -2919,6 +2919,41 @@ Check for assignment of a fixed address to a pointer. p = (int *) 0x10000; // warn } +.. _alpha-core-FixedAddressDereference: + +alpha.core.FixedAddressDereference (C, C++, ObjC) +""""""""""""""""""""""""""""""""""""""""""""""""" +Check for dereferences of fixed addresses. + +A pointer contains a fixed address if it was set to a hard-coded value or it +becomes otherwise obvious that at that point it can have only a single specific +value. + +.. code-block:: c + + void test1() { + int *p = (int *)0x020; + int x = p[0]; // warn + } + + void test2(int *p) { + if (p == (int *)-1) + *p = 0; // warn + } + + void test3() { + int (*p_function)(char, char); + p_function = (int (*)(char, char))0x04080; + int x = (*p_function)('x', 'y'); // NO warning yet at functon pointer calls + } + +If the analyzer option ``suppress-dereferences-from-any-address-space`` is set +to true (the default value), then this checker never reports dereference of +pointers with a specified address space. If the option is set to false, then +reports from the specific x86 address spaces 256, 257 and 258 are still +suppressed, but fixed address dereferences from other address spaces are +reported. + .. _alpha-core-PointerArithm: alpha.core.PointerArithm (C) diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index ed6bd797684d9..38e2417dcd181 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2214,7 +2214,11 @@ enum CXCursorKind { */ CXCursor_OpenACCAtomicConstruct = 332, - CXCursor_LastStmt = CXCursor_OpenACCAtomicConstruct, + /** OpenACC cache Construct. + */ + CXCursor_OpenACCCacheConstruct = 333, + + CXCursor_LastStmt = CXCursor_OpenACCCacheConstruct, /** * Cursor that represents the translation unit itself. @@ -3061,6 +3065,18 @@ enum CXCallingConv { CXCallingConv_M68kRTD = 19, CXCallingConv_PreserveNone = 20, CXCallingConv_RISCVVectorCall = 21, + CXCallingConv_RISCVVLSCall_32 = 22, + CXCallingConv_RISCVVLSCall_64 = 23, + CXCallingConv_RISCVVLSCall_128 = 24, + CXCallingConv_RISCVVLSCall_256 = 25, + CXCallingConv_RISCVVLSCall_512 = 26, + CXCallingConv_RISCVVLSCall_1024 = 27, + CXCallingConv_RISCVVLSCall_2048 = 28, + CXCallingConv_RISCVVLSCall_4096 = 29, + CXCallingConv_RISCVVLSCall_8192 = 30, + CXCallingConv_RISCVVLSCall_16384 = 31, + CXCallingConv_RISCVVLSCall_32768 = 32, + CXCallingConv_RISCVVLSCall_65536 = 33, CXCallingConv_Invalid = 100, CXCallingConv_Unexposed = 200 @@ -6628,6 +6644,28 @@ CINDEX_LINKAGE unsigned clang_visitCXXBaseClasses(CXType T, CXFieldVisitor visitor, CXClientData client_data); +/** + * Visit the class methods of a type. + * + * This function visits all the methods of the given cursor, + * invoking the given \p visitor function with the cursors of each + * visited method. The traversal may be ended prematurely, if + * the visitor returns \c CXFieldVisit_Break. + * + * \param T The record type whose field may be visited. + * + * \param visitor The visitor function that will be invoked for each + * field of \p T. + * + * \param client_data Pointer data supplied by the client, which will + * be passed to the visitor each time it is invoked. + * + * \returns A non-zero value if the traversal was terminated + * prematurely by the visitor returning \c CXFieldVisit_Break. + */ +CINDEX_LINKAGE unsigned clang_visitCXXMethods(CXType T, CXFieldVisitor visitor, + CXClientData client_data); + /** * Describes the kind of binary operators. */ diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 4c6966c922cc7..f9a12260a6590 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -723,7 +723,7 @@ class ASTContext : public RefCountedBase { // (However they are still accessible via TranslationUnitDecl->decls()) // // Changing the scope clears the parent cache, which is expensive to rebuild. - std::vector getTraversalScope() const { return TraversalScope; } + ArrayRef getTraversalScope() const { return TraversalScope; } void setTraversalScope(const std::vector &); /// Forwards to get node parents from the ParentMapContext. New callers should diff --git a/clang/include/clang/AST/AttrIterator.h b/clang/include/clang/AST/AttrIterator.h index 7e2bb0381d4c8..2f39c144dc160 100644 --- a/clang/include/clang/AST/AttrIterator.h +++ b/clang/include/clang/AST/AttrIterator.h @@ -16,6 +16,7 @@ #include "clang/Basic/LLVM.h" #include "llvm/ADT/ADL.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" #include #include @@ -124,6 +125,17 @@ inline auto *getSpecificAttr(const Container &container) { return It != specific_attr_end(container) ? *It : nullptr; } +template +inline auto getSpecificAttrs(const Container &container) { + using ValueTy = llvm::detail::ValueOfRange; + using ValuePointeeTy = std::remove_pointer_t; + using IterTy = std::conditional_t, + const SpecificAttr, SpecificAttr>; + auto Begin = specific_attr_begin(container); + auto End = specific_attr_end(container); + return llvm::make_range(Begin, End); +} + } // namespace clang #endif // LLVM_CLANG_AST_ATTRITERATOR_H diff --git a/clang/include/clang/AST/DeclOpenACC.h b/clang/include/clang/AST/DeclOpenACC.h new file mode 100644 index 0000000000000..9e99061ffc47f --- /dev/null +++ b/clang/include/clang/AST/DeclOpenACC.h @@ -0,0 +1,161 @@ +//=- DeclOpenACC.h - Classes for representing OpenACC directives -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines OpenACC nodes for declarative directives. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_DECLOPENACC_H +#define LLVM_CLANG_AST_DECLOPENACC_H + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/OpenACCClause.h" +#include "clang/Basic/OpenACCKinds.h" + +namespace clang { + +// A base class for the declaration constructs, which manages the clauses and +// basic source location information. Currently not part of the Decl inheritence +// tree, as we should never have a reason to store one of these. +class OpenACCConstructDecl : public Decl { + friend class ASTDeclReader; + friend class ASTDeclWriter; + // The directive kind, each implementation of this interface is expected to + // handle a specific kind. + OpenACCDirectiveKind DirKind = OpenACCDirectiveKind::Invalid; + SourceLocation DirectiveLoc; + SourceLocation EndLoc; + /// The list of clauses. This is stored here as an ArrayRef, as this is the + /// most convienient place to access the list, however the list itself should + /// be stored in leaf nodes, likely in trailing-storage. + MutableArrayRef Clauses; + +protected: + OpenACCConstructDecl(Kind DeclKind, DeclContext *DC, OpenACCDirectiveKind K, + SourceLocation StartLoc, SourceLocation DirLoc, + SourceLocation EndLoc) + : Decl(DeclKind, DC, StartLoc), DirKind(K), DirectiveLoc(DirLoc), + EndLoc(EndLoc) {} + + OpenACCConstructDecl(Kind DeclKind) : Decl(DeclKind, EmptyShell{}) {} + + void setClauseList(MutableArrayRef NewClauses) { + assert(Clauses.empty() && "Cannot change clause list"); + Clauses = NewClauses; + } + +public: + OpenACCDirectiveKind getDirectiveKind() const { return DirKind; } + SourceLocation getDirectiveLoc() const { return DirectiveLoc; } + virtual SourceRange getSourceRange() const override LLVM_READONLY { + return SourceRange(getLocation(), EndLoc); + } + + ArrayRef clauses() const { return Clauses; } +}; + +class OpenACCDeclareDecl final + : public OpenACCConstructDecl, + private llvm::TrailingObjects { + friend TrailingObjects; + friend class ASTDeclReader; + friend class ASTDeclWriter; + + OpenACCDeclareDecl(unsigned NumClauses) + : OpenACCConstructDecl(OpenACCDeclare) { + std::uninitialized_value_construct( + getTrailingObjects(), + getTrailingObjects() + NumClauses); + setClauseList(MutableArrayRef(getTrailingObjects(), + NumClauses)); + } + + OpenACCDeclareDecl(DeclContext *DC, SourceLocation StartLoc, + SourceLocation DirLoc, SourceLocation EndLoc, + ArrayRef Clauses) + : OpenACCConstructDecl(OpenACCDeclare, DC, OpenACCDirectiveKind::Declare, + StartLoc, DirLoc, EndLoc) { + // Initialize the trailing storage. + std::uninitialized_copy(Clauses.begin(), Clauses.end(), + getTrailingObjects()); + + setClauseList(MutableArrayRef(getTrailingObjects(), + Clauses.size())); + } + +public: + static OpenACCDeclareDecl *Create(ASTContext &Ctx, DeclContext *DC, + SourceLocation StartLoc, + SourceLocation DirLoc, + SourceLocation EndLoc, + ArrayRef Clauses); + static OpenACCDeclareDecl * + CreateDeserialized(ASTContext &Ctx, GlobalDeclID ID, unsigned NumClauses); + static bool classof(const Decl *D) { return classofKind(D->getKind()); } + static bool classofKind(Kind K) { return K == OpenACCDeclare; } +}; + +class OpenACCRoutineDecl final + : public OpenACCConstructDecl, + private llvm::TrailingObjects { + friend TrailingObjects; + friend class ASTDeclReader; + friend class ASTDeclWriter; + + Expr *FuncRef = nullptr; + SourceRange ParensLoc; + + OpenACCRoutineDecl(unsigned NumClauses) + : OpenACCConstructDecl(OpenACCRoutine) { + std::uninitialized_value_construct( + getTrailingObjects(), + getTrailingObjects() + NumClauses); + setClauseList(MutableArrayRef(getTrailingObjects(), + NumClauses)); + } + + OpenACCRoutineDecl(DeclContext *DC, SourceLocation StartLoc, + SourceLocation DirLoc, SourceLocation LParenLoc, + Expr *FuncRef, SourceLocation RParenLoc, + SourceLocation EndLoc, + ArrayRef Clauses) + : OpenACCConstructDecl(OpenACCRoutine, DC, OpenACCDirectiveKind::Routine, + StartLoc, DirLoc, EndLoc), + FuncRef(FuncRef), ParensLoc(LParenLoc, RParenLoc) { + // Initialize the trailing storage. + std::uninitialized_copy(Clauses.begin(), Clauses.end(), + getTrailingObjects()); + setClauseList(MutableArrayRef(getTrailingObjects(), + Clauses.size())); + } + +public: + static OpenACCRoutineDecl * + Create(ASTContext &Ctx, DeclContext *DC, SourceLocation StartLoc, + SourceLocation DirLoc, SourceLocation LParenLoc, Expr *FuncRef, + SourceLocation RParenLoc, SourceLocation EndLoc, + ArrayRef Clauses); + static OpenACCRoutineDecl * + CreateDeserialized(ASTContext &Ctx, GlobalDeclID ID, unsigned NumClauses); + static bool classof(const Decl *D) { return classofKind(D->getKind()); } + static bool classofKind(Kind K) { return K == OpenACCRoutine; } + + const Expr *getFunctionReference() const { return FuncRef; } + + Expr *getFunctionReference() { return FuncRef; } + + SourceLocation getLParenLoc() const { return ParensLoc.getBegin(); } + SourceLocation getRParenLoc() const { return ParensLoc.getEnd(); } + + bool hasNameSpecified() const { return !ParensLoc.getBegin().isInvalid(); } +}; +} // namespace clang + +#endif diff --git a/clang/include/clang/AST/DeclVisitor.h b/clang/include/clang/AST/DeclVisitor.h index 8690cdda4bb70..8b7c30ee4051e 100644 --- a/clang/include/clang/AST/DeclVisitor.h +++ b/clang/include/clang/AST/DeclVisitor.h @@ -18,6 +18,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenACC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "llvm/ADT/STLExtras.h" diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 0f98d237dcbcd..cfe49acf20b77 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -5189,6 +5189,16 @@ class InitListExpr : public Expr { unsigned getNumInits() const { return InitExprs.size(); } + /// getNumInits but if the list has an EmbedExpr inside includes full length + /// of embedded data. + unsigned getNumInitsWithEmbedExpanded() const { + unsigned Sum = InitExprs.size(); + for (auto *IE : InitExprs) + if (auto *EE = dyn_cast(IE)) + Sum += EE->getDataElementCount() - 1; + return Sum; + } + /// Retrieve the set of initializers. Expr **getInits() { return reinterpret_cast(InitExprs.data()); } diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h index 660a05435003d..570662b58ccf0 100644 --- a/clang/include/clang/AST/JSONNodeDumper.h +++ b/clang/include/clang/AST/JSONNodeDumper.h @@ -281,6 +281,9 @@ class JSONNodeDumper void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D); void VisitBlockDecl(const BlockDecl *D); + void VisitOpenACCDeclareDecl(const OpenACCDeclareDecl *D); + void VisitOpenACCRoutineDecl(const OpenACCRoutineDecl *D); + void VisitDeclRefExpr(const DeclRefExpr *DRE); void VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E); void VisitOpenACCAsteriskSizeExpr(const OpenACCAsteriskSizeExpr *E); diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h index f5be54bdada8b..b2cf621bc0a78 100644 --- a/clang/include/clang/AST/OpenACCClause.h +++ b/clang/include/clang/AST/OpenACCClause.h @@ -162,6 +162,26 @@ class OpenACCSeqClause : public OpenACCClause { return const_child_range(const_child_iterator(), const_child_iterator()); } }; +// Represents the 'nohost' clause. +class OpenACCNoHostClause : public OpenACCClause { +protected: + OpenACCNoHostClause(SourceLocation BeginLoc, SourceLocation EndLoc) + : OpenACCClause(OpenACCClauseKind::NoHost, BeginLoc, EndLoc) {} + +public: + static bool classof(const OpenACCClause *C) { + return C->getClauseKind() == OpenACCClauseKind::NoHost; + } + static OpenACCNoHostClause * + Create(const ASTContext &Ctx, SourceLocation BeginLoc, SourceLocation EndLoc); + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } +}; /// Represents a clause that has a list of parameters. class OpenACCClauseWithParams : public OpenACCClause { @@ -1171,6 +1191,55 @@ class OpenACCReductionClause final OpenACCReductionOperator getReductionOp() const { return Op; } }; +class OpenACCLinkClause final + : public OpenACCClauseWithVarList, + private llvm::TrailingObjects { + friend TrailingObjects; + + OpenACCLinkClause(SourceLocation BeginLoc, SourceLocation LParenLoc, + ArrayRef VarList, SourceLocation EndLoc) + : OpenACCClauseWithVarList(OpenACCClauseKind::Link, BeginLoc, LParenLoc, + EndLoc) { + std::uninitialized_copy(VarList.begin(), VarList.end(), + getTrailingObjects()); + setExprs(MutableArrayRef(getTrailingObjects(), VarList.size())); + } + +public: + static bool classof(const OpenACCClause *C) { + return C->getClauseKind() == OpenACCClauseKind::Link; + } + + static OpenACCLinkClause *Create(const ASTContext &C, SourceLocation BeginLoc, + SourceLocation LParenLoc, + ArrayRef VarList, + SourceLocation EndLoc); +}; + +class OpenACCDeviceResidentClause final + : public OpenACCClauseWithVarList, + private llvm::TrailingObjects { + friend TrailingObjects; + + OpenACCDeviceResidentClause(SourceLocation BeginLoc, SourceLocation LParenLoc, + ArrayRef VarList, SourceLocation EndLoc) + : OpenACCClauseWithVarList(OpenACCClauseKind::DeviceResident, BeginLoc, + LParenLoc, EndLoc) { + std::uninitialized_copy(VarList.begin(), VarList.end(), + getTrailingObjects()); + setExprs(MutableArrayRef(getTrailingObjects(), VarList.size())); + } + +public: + static bool classof(const OpenACCClause *C) { + return C->getClauseKind() == OpenACCClauseKind::DeviceResident; + } + + static OpenACCDeviceResidentClause * + Create(const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc, + ArrayRef VarList, SourceLocation EndLoc); +}; + template class OpenACCClauseVisitor { Impl &getDerived() { return static_cast(*this); } diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 5964cbaec8e44..5ca3e435f033b 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -20,6 +20,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenACC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclarationName.h" @@ -1821,6 +1822,14 @@ DEF_TRAVERSE_DECL(OMPAllocateDecl, { TRY_TO(TraverseOMPClause(C)); }) +DEF_TRAVERSE_DECL(OpenACCDeclareDecl, + { TRY_TO(VisitOpenACCClauseList(D->clauses())); }) + +DEF_TRAVERSE_DECL(OpenACCRoutineDecl, { + TRY_TO(TraverseStmt(D->getFunctionReference())); + TRY_TO(VisitOpenACCClauseList(D->clauses())); +}) + // A helper method for TemplateDecl's children. template bool RecursiveASTVisitor::TraverseTemplateParameterListHelper( @@ -4109,6 +4118,10 @@ DEF_TRAVERSE_STMT(OpenACCUpdateConstruct, { TRY_TO(VisitOpenACCClauseList(S->clauses())); }) DEF_TRAVERSE_STMT(OpenACCAtomicConstruct, { TRY_TO(TraverseOpenACCAssociatedStmtConstruct(S)); }) +DEF_TRAVERSE_STMT(OpenACCCacheConstruct, { + for (auto *E : S->getVarList()) + TRY_TO(TraverseStmt(E)); +}) // Traverse HLSL: Out argument expression DEF_TRAVERSE_STMT(HLSLOutArgExpr, {}) diff --git a/clang/include/clang/AST/StmtOpenACC.h b/clang/include/clang/AST/StmtOpenACC.h index bd6c95d342ce2..c2c74f5cf1958 100644 --- a/clang/include/clang/AST/StmtOpenACC.h +++ b/clang/include/clang/AST/StmtOpenACC.h @@ -593,6 +593,81 @@ class OpenACCWaitConstruct final } }; +class OpenACCCacheConstruct final + : public OpenACCConstructStmt, + private llvm::TrailingObjects { + friend TrailingObjects; + friend class ASTStmtWriter; + friend class ASTStmtReader; + // Locations of the left and right parens of the 'var-list' + // expression-list. + SourceRange ParensLoc; + SourceLocation ReadOnlyLoc; + + unsigned NumVars = 0; + + OpenACCCacheConstruct(unsigned NumVars) + : OpenACCConstructStmt(OpenACCCacheConstructClass, + OpenACCDirectiveKind::Cache, SourceLocation{}, + SourceLocation{}, SourceLocation{}), + NumVars(NumVars) { + std::uninitialized_value_construct(getVarListPtr(), + getVarListPtr() + NumVars); + } + OpenACCCacheConstruct(SourceLocation Start, SourceLocation DirectiveLoc, + SourceLocation LParenLoc, SourceLocation ReadOnlyLoc, + ArrayRef VarList, SourceLocation RParenLoc, + SourceLocation End) + : OpenACCConstructStmt(OpenACCCacheConstructClass, + OpenACCDirectiveKind::Cache, Start, DirectiveLoc, + End), + ParensLoc(LParenLoc, RParenLoc), ReadOnlyLoc(ReadOnlyLoc), + NumVars(VarList.size()) { + + std::uninitialized_copy(VarList.begin(), VarList.end(), getVarListPtr()); + } + + Expr **getVarListPtr() const { + return const_cast(getTrailingObjects()); + } + +public: + llvm::ArrayRef getVarList() const { + return llvm::ArrayRef(getVarListPtr(), NumVars); + } + + llvm::ArrayRef getVarList() { + return llvm::ArrayRef(getVarListPtr(), NumVars); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OpenACCCacheConstructClass; + } + + static OpenACCCacheConstruct *CreateEmpty(const ASTContext &C, + unsigned NumVars); + static OpenACCCacheConstruct * + Create(const ASTContext &C, SourceLocation Start, SourceLocation DirectiveLoc, + SourceLocation LParenLoc, SourceLocation ReadOnlyLoc, + ArrayRef VarList, SourceLocation RParenLoc, + SourceLocation End); + + SourceLocation getLParenLoc() const { return ParensLoc.getBegin(); } + SourceLocation getRParenLoc() const { return ParensLoc.getEnd(); } + bool hasReadOnly() const { return !ReadOnlyLoc.isInvalid(); } + SourceLocation getReadOnlyLoc() const { return ReadOnlyLoc; } + + child_range children() { + Stmt **Begin = reinterpret_cast(getVarListPtr()); + return child_range(Begin, Begin + NumVars); + } + + const_child_range children() const { + Stmt *const *Begin = reinterpret_cast(getVarListPtr()); + return const_child_range(Begin, Begin + NumVars); + } +}; + // This class represents an 'init' construct, which has just a clause list. class OpenACCInitConstruct final : public OpenACCConstructStmt, diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 81844db2c77fa..ad2d4a7a973b3 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -422,7 +422,10 @@ class TextNodeDumper void VisitOpenACCShutdownConstruct(const OpenACCShutdownConstruct *S); void VisitOpenACCUpdateConstruct(const OpenACCUpdateConstruct *S); void VisitOpenACCAtomicConstruct(const OpenACCAtomicConstruct *S); + void VisitOpenACCCacheConstruct(const OpenACCCacheConstruct *S); void VisitOpenACCAsteriskSizeExpr(const OpenACCAsteriskSizeExpr *S); + void VisitOpenACCDeclareDecl(const OpenACCDeclareDecl *D); + void VisitOpenACCRoutineDecl(const OpenACCRoutineDecl *D); void VisitEmbedExpr(const EmbedExpr *S); void VisitAtomicExpr(const AtomicExpr *AE); void VisitConvertVectorExpr(const ConvertVectorExpr *S); diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index c3ff7ebd88516..ef59bd1621fb8 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -1943,11 +1943,6 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { LLVM_PREFERRED_TYPE(TypeBitfields) unsigned : NumTypeBits; - /// Extra information which affects how the function is called, like - /// regparm and the calling convention. - LLVM_PREFERRED_TYPE(CallingConv) - unsigned ExtInfo : 13; - /// The ref-qualifier associated with a \c FunctionProtoType. /// /// This is a value of type \c RefQualifierKind. @@ -1966,12 +1961,6 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { LLVM_PREFERRED_TYPE(bool) unsigned HasExtQuals : 1; - /// The number of parameters this function has, not counting '...'. - /// According to [implimits] 8 bits should be enough here but this is - /// somewhat easy to exceed with metaprogramming and so we would like to - /// keep NumParams as wide as reasonably possible. - unsigned NumParams : FunctionTypeNumParamsWidth; - /// The type of exception specification this function has. LLVM_PREFERRED_TYPE(ExceptionSpecificationType) unsigned ExceptionSpecType : 4; @@ -1991,6 +1980,17 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { /// Whether this function has a trailing return type. LLVM_PREFERRED_TYPE(bool) unsigned HasTrailingReturn : 1; + + /// Extra information which affects how the function is called, like + /// regparm and the calling convention. + LLVM_PREFERRED_TYPE(CallingConv) + unsigned ExtInfo : 14; + + /// The number of parameters this function has, not counting '...'. + /// According to [implimits] 8 bits should be enough here but this is + /// somewhat easy to exceed with metaprogramming and so we would like to + /// keep NumParams as wide as reasonably possible. + unsigned NumParams : FunctionTypeNumParamsWidth; }; class ObjCObjectTypeBitfields { @@ -2661,6 +2661,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isHLSLSpecificType() const; // Any HLSL specific type bool isHLSLBuiltinIntangibleType() const; // Any HLSL builtin intangible type bool isHLSLAttributedResourceType() const; + bool isHLSLResourceRecord() const; bool isHLSLIntangibleType() const; // Any HLSL intangible type (builtin, array, class) @@ -4438,19 +4439,16 @@ class FunctionType : public Type { // Type::FunctionTypeBitfields::ExtInfo as well. // | CC |noreturn|produces|nocallersavedregs|regparm|nocfcheck|cmsenscall| - // |0 .. 4| 5 | 6 | 7 |8 .. 10| 11 | 12 | + // |0 .. 5| 6 | 7 | 8 |9 .. 11| 12 | 13 | // // regparm is either 0 (no regparm attribute) or the regparm value+1. - enum { CallConvMask = 0x1F }; - enum { NoReturnMask = 0x20 }; - enum { ProducesResultMask = 0x40 }; - enum { NoCallerSavedRegsMask = 0x80 }; - enum { - RegParmMask = 0x700, - RegParmOffset = 8 - }; - enum { NoCfCheckMask = 0x800 }; - enum { CmseNSCallMask = 0x1000 }; + enum { CallConvMask = 0x3F }; + enum { NoReturnMask = 0x40 }; + enum { ProducesResultMask = 0x80 }; + enum { NoCallerSavedRegsMask = 0x100 }; + enum { RegParmMask = 0xe00, RegParmOffset = 9 }; + enum { NoCfCheckMask = 0x1000 }; + enum { CmseNSCallMask = 0x2000 }; uint16_t Bits = CC_C; ExtInfo(unsigned Bits) : Bits(static_cast(Bits)) {} diff --git a/clang/include/clang/AST/VTableBuilder.h b/clang/include/clang/AST/VTableBuilder.h index a5de41dbc22f1..e1efe8cddcc5e 100644 --- a/clang/include/clang/AST/VTableBuilder.h +++ b/clang/include/clang/AST/VTableBuilder.h @@ -150,7 +150,7 @@ class VTableComponent { bool isRTTIKind() const { return isRTTIKind(getKind()); } - GlobalDecl getGlobalDecl() const { + GlobalDecl getGlobalDecl(bool HasVectorDeletingDtors) const { assert(isUsedFunctionPointerKind() && "GlobalDecl can be created only from virtual function"); @@ -161,7 +161,9 @@ class VTableComponent { case CK_CompleteDtorPointer: return GlobalDecl(DtorDecl, CXXDtorType::Dtor_Complete); case CK_DeletingDtorPointer: - return GlobalDecl(DtorDecl, CXXDtorType::Dtor_Deleting); + return GlobalDecl(DtorDecl, (HasVectorDeletingDtors) + ? CXXDtorType::Dtor_VectorDeleting + : CXXDtorType::Dtor_Deleting); case CK_VCallOffset: case CK_VBaseOffset: case CK_OffsetToTop: diff --git a/clang/include/clang/Basic/ABI.h b/clang/include/clang/Basic/ABI.h index 231bad799a42c..48969e4f295c3 100644 --- a/clang/include/clang/Basic/ABI.h +++ b/clang/include/clang/Basic/ABI.h @@ -31,10 +31,11 @@ enum CXXCtorType { /// C++ destructor types. enum CXXDtorType { - Dtor_Deleting, ///< Deleting dtor - Dtor_Complete, ///< Complete object dtor - Dtor_Base, ///< Base object dtor - Dtor_Comdat ///< The COMDAT used for dtors + Dtor_Deleting, ///< Deleting dtor + Dtor_Complete, ///< Complete object dtor + Dtor_Base, ///< Base object dtor + Dtor_Comdat, ///< The COMDAT used for dtors + Dtor_VectorDeleting ///< Vector deleting dtor }; } // end namespace clang diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 80a51c92cc520..ccfe69d32e0a6 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2232,8 +2232,8 @@ def RISCVInterrupt : InheritableAttr, TargetSpecificAttr { let Spellings = [GCC<"interrupt">]; let Subjects = SubjectList<[Function]>; let Args = [EnumArgument<"Interrupt", "InterruptType", /*is_string=*/true, - ["supervisor", "machine"], - ["supervisor", "machine"], + ["supervisor", "machine", "qci-nest", "qci-nonest"], + ["supervisor", "machine", "qcinest", "qcinonest"], 1>]; let ParseKind = "Interrupt"; let Documentation = [RISCVInterruptDocs]; @@ -3316,6 +3316,14 @@ def RISCVVectorCC: DeclOrTypeAttr, TargetSpecificAttr { let Documentation = [RISCVVectorCCDocs]; } +def RISCVVLSCC: DeclOrTypeAttr, TargetSpecificAttr { + let Spellings = [CXX11<"riscv", "vls_cc">, + C23<"riscv", "vls_cc">, + Clang<"riscv_vls_cc">]; + let Args = [UnsignedArgument<"VectorWidth", /*opt*/1>]; + let Documentation = [RISCVVLSCCDocs]; +} + def Target : InheritableAttr { let Spellings = [GCC<"target">]; let Args = [StringArgument<"featuresStr">]; @@ -5018,3 +5026,11 @@ def Atomic : StmtAttr { let Documentation = [AtomicDocs]; let StrictEnumParameters = 1; } + +def OpenACCRoutineAnnot : InheritableAttr { + // This attribute is used to mark that a function is targetted by a `routine` + // directive, so it dones't have a spelling and is always implicit. + let Spellings = []; + let Subjects = SubjectList<[Function]>; + let Documentation = [InternalOnly]; +} diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index d6d43df44fb21..fdc58d1c92c0d 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2828,8 +2828,17 @@ targets. This attribute may be attached to a function definition and instructs the backend to generate appropriate function entry/exit code so that it can be used directly as an interrupt service routine. -Permissible values for this parameter are ``user``, ``supervisor``, -and ``machine``. If there is no parameter, then it defaults to machine. +Permissible values for this parameter are ``supervisor``, ``machine``, +``qci-nest`` and ``qci-nonest``. If there is no parameter, then it defaults to +``machine``. + +The ``qci-nest`` and ``qci-nonest`` values require Qualcomm's Xqciint extension +and are used for Machine-mode Interrupts and Machine-mode Non-maskable +interrupts. These use the following instructions from Xqciint to save and +restore interrupt state to the stack -- the ``qci-nest`` value will use +``qc.c.mienter.nest`` and the ``qci-nonest`` value will use ``qc.c.mienter`` to +begin the interrupt handler. Both of these will use ``qc.c.mileaveret`` to +restore the state and return to the previous context. Repeated interrupt attribute on the same declaration will cause a warning to be emitted. In case of repeated declarations, the last one prevails. @@ -2839,6 +2848,7 @@ https://gcc.gnu.org/onlinedocs/gcc/RISC-V-Function-Attributes.html https://riscv.org/specifications/privileged-isa/ The RISC-V Instruction Set Manual Volume II: Privileged Architecture Version 1.10. +https://github.com/quic/riscv-unified-db/releases/tag/Xqci-0.6 }]; } @@ -6214,6 +6224,17 @@ them if they use them. }]; } +def RISCVVLSCCDocs : Documentation { + let Category = DocCatCallingConvs; + let Heading = "riscv::vls_cc, riscv_vls_cc, clang::riscv_vls_cc"; + let Content = [{ +The ``riscv_vls_cc`` attribute can be applied to a function. Functions +declared with this attribute will utilize the standard fixed-length vector +calling convention variant instead of the default calling convention defined by +the ABI. This variant aims to pass fixed-length vectors via vector registers, +if possible, rather than through general-purpose registers.}]; +} + def PreferredNameDocs : Documentation { let Category = DocCatDecl; let Content = [{ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 598ae171b1389..2268df70927a7 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4765,6 +4765,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> { } // HLSL +def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_adduint64"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_resource_getpointer"]; let Attributes = [NoThrow]; @@ -4783,6 +4789,12 @@ def HLSLAnd : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLOr : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_or"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLAny : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_any"]; let Attributes = [NoThrow, Const]; diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 6d00862dde5ed..44ef404aee72f 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -487,9 +487,6 @@ TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal, "vIi", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vv*i", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst, "bIi", "n", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vv*i", "n", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vv*", "n", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "vIs", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_get_named_barrier_state, "Uiv*", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_prefetch_data, "vvC*Ui", "nc", "gfx12-insts") diff --git a/clang/include/clang/Basic/DeclNodes.td b/clang/include/clang/Basic/DeclNodes.td index 723113dc2486e..20debd67a31a5 100644 --- a/clang/include/clang/Basic/DeclNodes.td +++ b/clang/include/clang/Basic/DeclNodes.td @@ -111,3 +111,5 @@ def Empty : DeclNode; def RequiresExprBody : DeclNode, DeclContext; def LifetimeExtendedTemporary : DeclNode; def HLSLBuffer : DeclNode, DeclContext; +def OpenACCDeclare : DeclNode; +def OpenACCRoutine : DeclNode; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 77520447b813d..fac80fb4009aa 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -791,6 +791,8 @@ def ReservedIdAsMacroAlias : DiagGroup<"reserved-id-macro", [ReservedIdAsMacro]> def ReservedAttributeIdentifier : DiagGroup<"reserved-attribute-identifier">; def RestrictExpansionMacro : DiagGroup<"restrict-expansion">; def FinalMacro : DiagGroup<"final-macro">; +def UndefinedTrueIdentifier : DiagGroup<"undef-true">; +def UndefinedIdentifier : DiagGroup<"undef", [UndefinedTrueIdentifier]>; // Just silence warnings about -Wstrict-aliasing for now. def : DiagGroup<"strict-aliasing=0">; diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h index b49185c3335d8..017ef7065610f 100644 --- a/clang/include/clang/Basic/DiagnosticIDs.h +++ b/clang/include/clang/Basic/DiagnosticIDs.h @@ -37,7 +37,7 @@ namespace clang { DIAG_SIZE_DRIVER = 400, DIAG_SIZE_FRONTEND = 200, DIAG_SIZE_SERIALIZATION = 120, - DIAG_SIZE_LEX = 400, + DIAG_SIZE_LEX = 500, DIAG_SIZE_PARSE = 700, DIAG_SIZE_AST = 300, DIAG_SIZE_COMMENT = 100, diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 2b1cc81677b08..0e5592d65669b 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -392,7 +392,10 @@ def pp_macro_not_used : Warning<"macro is not used">, DefaultIgnore, InGroup>; def warn_pp_undef_identifier : Warning< "%0 is not defined, evaluates to 0">, - InGroup>, DefaultIgnore; + InGroup, DefaultIgnore; +def warn_pp_undef_true_identifier : Warning< + "'true' is not defined, evaluates to 0">, + InGroup; def warn_pp_undef_prefix : Warning< "%0 is not defined, evaluates to 0">, InGroup>, DefaultIgnore; @@ -715,6 +718,8 @@ def warn_pragma_debug_unexpected_command : Warning< "unexpected debug command '%0'">, InGroup; def warn_pragma_debug_unknown_module : Warning< "unknown module '%0'">, InGroup; +def warn_pragma_debug_unable_to_find_module : Warning< + "unable to find module '%0'">, InGroup; // #pragma module def err_pp_expected_module_name : Error< "expected %select{identifier after '.' in |}0module name">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d094c075ecee2..1b46920e09619 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -551,7 +551,7 @@ def err_decomp_decl_constraint : Error< def err_decomp_decl_parens : Error< "decomposition declaration cannot be declared with parentheses">; def err_decomp_decl_template : Error< - "decomposition declaration template not supported">; + "decomposition declaration cannot be a template">; def err_decomp_decl_not_alone : Error< "decomposition declaration must be the only declaration in its group">; def err_decomp_decl_requires_init : Error< @@ -3343,6 +3343,9 @@ def err_attribute_argument_out_of_range : Error< def err_init_priority_object_attr : Error< "can only use 'init_priority' attribute on file-scope definitions " "of objects of class type">; +def warn_init_priority_reserved : Warning< + "requested 'init_priority' %0 is reserved for internal use">, + InGroup>, DefaultError; def err_attribute_argument_out_of_bounds : Error< "%0 attribute parameter %1 is out of bounds">; def err_attribute_only_once_per_parameter : Error< @@ -5207,16 +5210,11 @@ def err_template_unnamed_class : Error< def err_template_param_list_different_arity : Error< "%select{too few|too many}0 template parameters in template " "%select{|template parameter }1redeclaration">; -def note_template_param_list_different_arity : Note< - "%select{too few|too many}0 template parameters in template template " - "argument">; def note_template_prev_declaration : Note< "previous template %select{declaration|template parameter}0 is here">; def err_template_param_different_kind : Error< "template parameter has a different kind in template " "%select{|template parameter }0redeclaration">; -def note_template_param_different_kind : Note< - "template parameter has a different kind in template argument">; def err_invalid_decl_specifier_in_nontype_parm : Error< "invalid declaration specifier in template non-type parameter">; @@ -5225,8 +5223,6 @@ def err_template_nontype_parm_different_type : Error< "template non-type parameter has a different type %0 in template " "%select{|template parameter }1redeclaration">; -def note_template_nontype_parm_different_type : Note< - "template non-type parameter has a different type %0 in template argument">; def note_template_nontype_parm_prev_declaration : Note< "previous non-type template parameter with type %0 is here">; def err_template_nontype_parm_bad_type : Error< @@ -5317,10 +5313,15 @@ def err_template_missing_args : Error< "%select{class template|function template|variable template|alias template|" "template template parameter|concept|template}0 %1 requires template " "arguments">; -def err_template_arg_list_different_arity : Error< - "%select{too few|too many}0 template arguments for " +def err_template_param_missing_arg : Error< + "missing template argument for template parameter">; +def err_template_template_param_missing_param : Error< + "no template parameter in this template template parameter " + "corresponds to non-defaulted template parameter of argument template">; +def err_template_too_many_args : Error< + "too many template arguments for " "%select{class template|function template|variable template|alias template|" - "template template parameter|concept|template}1 %2">; + "template template parameter|concept|template}0 %1">; def note_template_decl_here : Note<"template is declared here">; def note_template_decl_external : Note< "template declaration from hidden source: %0">; @@ -5358,11 +5359,8 @@ def err_template_arg_not_valid_template : Error< "template parameter">; def note_template_arg_refers_here_func : Note< "template argument refers to function template %0, here">; -def err_template_arg_template_params_mismatch : Error< - "template template argument has different template parameters than its " - "corresponding template template parameter">; def note_template_arg_template_params_mismatch : Note< - "template template argument has different template parameters than its " + "template template argument is incompatible with its " "corresponding template template parameter">; def err_non_deduced_mismatch : Error< "could not match %diff{$ against $|types}0,1">; @@ -5924,10 +5922,6 @@ def err_template_parameter_pack_non_pack : Error< "%select{template type|non-type template|template template}0 parameter" "%select{| pack}1 conflicts with previous %select{template type|" "non-type template|template template}0 parameter%select{ pack|}1">; -def note_template_parameter_pack_non_pack : Note< - "%select{template type|non-type template|template template}0 parameter" - "%select{| pack}1 does not match %select{template type|non-type template" - "|template template}0 parameter%select{ pack|}1 in template argument">; def note_template_parameter_pack_here : Note< "previous %select{template type|non-type template|template template}0 " "parameter%select{| pack}1 declared here">; @@ -7138,6 +7132,9 @@ def warn_shift_result_sets_sign_bit : Warning< "signed shift result (%0) sets the sign bit of the shift expression's " "type (%1) and becomes negative">, InGroup>, DefaultIgnore; +def warn_shift_bool : Warning< + "right shifting a 'bool' implicitly converts it to 'int'">, + InGroup>, DefaultIgnore; def warn_precedence_bitwise_rel : Warning< "%0 has lower precedence than %1; %1 will be evaluated first">, @@ -7157,7 +7154,7 @@ def note_precedence_conditional_first : Note< "place parentheses around the '?:' expression to evaluate it first">; def warn_consecutive_comparison : Warning< - "comparisons like 'X<=Y<=Z' don't have their mathematical meaning">, + "chained comparison 'X %0 Y %1 Z' does not behave the same as a mathematical expression">, InGroup, DefaultError; def warn_enum_constant_in_bool_context : Warning< @@ -10706,6 +10703,11 @@ def err_vector_incorrect_num_elements : Error< "%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">; def err_altivec_empty_initializer : Error<"expected initializer">; +def err_vector_incorrect_bit_count : Error< + "incorrect number of bits in vector operand (expected %select{|a multiple of}0 %1 bits, have %2)">; +def err_integer_incorrect_bit_count : Error< + "incorrect number of bits in integer (expected %0 bits, have %1)">; + def err_invalid_neon_type_code : Error< "incompatible constant for this __builtin_neon function">; def err_argument_invalid_range : Error< @@ -10794,6 +10796,23 @@ def err_non_local_variable_decl_in_for : Error< "declaration of non-local variable in 'for' loop">; def err_non_variable_decl_in_for : Error< "non-variable declaration in 'for' loop">; + +def ext_c23_non_local_variable_decl_in_for : Extension< + "declaration of non-local variable in 'for' loop is a C23 extension">, + InGroup; + +def warn_c17_non_local_variable_decl_in_for : Warning< + "declaration of non-local variable in 'for' loop is incompatible with C standards before C23">, + DefaultIgnore, InGroup; + +def ext_c23_non_variable_decl_in_for : Extension< + "non-variable declaration in 'for' loop is a C23 extension">, + InGroup; + +def warn_c17_non_variable_decl_in_for : Warning< + "non-variable declaration in 'for' loop is incompatible with C standards before C23">, + DefaultIgnore, InGroup; + def err_toomany_element_decls : Error< "only one element declaration is allowed">; def err_selector_element_not_lvalue : Error< @@ -12621,8 +12640,9 @@ def err_riscv_builtin_requires_extension : Error< def err_riscv_builtin_invalid_lmul : Error< "LMUL argument must be in the range [0,3] or [5,7]">; def err_riscv_type_requires_extension : Error< - "RISC-V type %0 requires the '%1' extension" ->; + "RISC-V type %0 requires the '%1' extension">; +def err_riscv_attribute_interrupt_requires_extension : Error< + "RISC-V interrupt attribute '%0' requires extension '%1'">; def err_std_source_location_impl_not_found : Error< "'std::source_location::__impl' was not found; it must be defined before '__builtin_source_location' is called">; @@ -12796,8 +12816,9 @@ def err_wasm_builtin_arg_must_be_integer_type : Error < "%ordinal0 argument must be an integer">; // OpenACC diagnostics. -def warn_acc_construct_unimplemented - : Warning<"OpenACC construct '%0' not yet implemented, pragma ignored">, +def warn_acc_routine_unimplemented + : Warning<"OpenACC construct 'routine' with implicit function not yet " + "implemented, pragma ignored">, InGroup; def warn_acc_clause_unimplemented : Warning<"OpenACC clause '%0' not yet implemented, clause ignored">, @@ -12812,6 +12833,7 @@ def err_acc_duplicate_clause_disallowed "directive">; def note_acc_previous_clause_here : Note<"previous clause is here">; def note_acc_previous_expr_here : Note<"previous expression is here">; +def note_acc_previous_reference : Note<"previous reference is here">; def err_acc_branch_in_out_compute_construct : Error<"invalid %select{branch|return|throw}0 %select{out of|into}1 " "OpenACC Compute/Combined Construct">; @@ -12844,9 +12866,12 @@ def err_acc_not_a_var_ref : Error<"OpenACC variable is not a valid variable name, sub-array, array " "element,%select{| member of a composite variable,}0 or composite " "variable member">; -def err_acc_not_a_var_ref_use_device - : Error<"OpenACC variable in 'use_device' clause is not a valid variable " - "name or array name">; +def err_acc_not_a_var_ref_use_device_declare + : Error<"OpenACC variable %select{in 'use_device' clause|on 'declare' " + "construct}0 is not a valid variable name or array name">; +def err_acc_not_a_var_ref_cache + : Error<"OpenACC variable in cache directive is not a valid sub-array or " + "array element">; def err_acc_typecheck_subarray_value : Error<"OpenACC sub-array subscripted value is not an array or pointer">; def err_acc_subarray_function_type @@ -13014,6 +13039,30 @@ def note_acc_atomic_mismatch_compound_operand "side of assignment||on left hand side of compound " "assignment|on left hand side of assignment}0('%1') from the first " "statement">; +def err_acc_declare_required_clauses + : Error<"no valid clauses specified in OpenACC 'declare' directive">; +def err_acc_declare_clause_at_global + : Error<"OpenACC '%0' clause on a 'declare' directive is not allowed at " + "global or namespace scope">; +def err_acc_link_not_extern + : Error<"variable referenced by 'link' clause not in global or namespace " + "scope must be marked 'extern'">; +def err_acc_declare_extern + : Error<"'extern' variable may not be referenced by '%0' clause on an " + "OpenACC 'declare' directive">; +def err_acc_declare_same_scope + : Error<"variable appearing in '%0' clause of OpenACC 'declare' directive " + "must be in the same scope as the directive">; +def err_acc_multiple_references + : Error<"variable referenced in '%0' clause of OpenACC 'declare' directive " + "was already referenced">; +def err_acc_routine_not_func + : Error<"OpenACC routine name '%0' does not name a function">; +def err_acc_routine_overload_set + : Error<"OpenACC routine name '%0' names a set of overloads">; +def err_acc_magic_static_in_routine + : Error<"function static variables are not permitted in functions to which " + "an OpenACC 'routine' directive applies">; // AMDGCN builtins diagnostics def err_amdgcn_global_load_lds_size_invalid_value : Error<"invalid size value">; diff --git a/clang/include/clang/Basic/OpenACCClauses.def b/clang/include/clang/Basic/OpenACCClauses.def index 8b15007c85557..f04965363f25e 100644 --- a/clang/include/clang/Basic/OpenACCClauses.def +++ b/clang/include/clang/Basic/OpenACCClauses.def @@ -44,6 +44,7 @@ VISIT_CLAUSE(Detach) VISIT_CLAUSE(Device) VISIT_CLAUSE(DeviceNum) VISIT_CLAUSE(DevicePtr) +VISIT_CLAUSE(DeviceResident) VISIT_CLAUSE(DeviceType) CLAUSE_ALIAS(DType, DeviceType, false) VISIT_CLAUSE(Finalize) @@ -53,7 +54,9 @@ VISIT_CLAUSE(Host) VISIT_CLAUSE(If) VISIT_CLAUSE(IfPresent) VISIT_CLAUSE(Independent) +VISIT_CLAUSE(Link) VISIT_CLAUSE(NoCreate) +VISIT_CLAUSE(NoHost) VISIT_CLAUSE(NumGangs) VISIT_CLAUSE(NumWorkers) VISIT_CLAUSE(Present) diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def index f234488eaa80c..1d0e97cc7fb4c 100644 --- a/clang/include/clang/Basic/Sanitizers.def +++ b/clang/include/clang/Basic/Sanitizers.def @@ -152,8 +152,7 @@ SANITIZER_GROUP("undefined", Undefined, FloatCastOverflow | IntegerDivideByZero | NonnullAttribute | Null | ObjectSize | PointerOverflow | Return | ReturnsNonnullAttribute | Shift | - SignedIntegerOverflow | Unreachable | VLABound | Function | - Vptr) + SignedIntegerOverflow | Unreachable | VLABound | Function) // -fsanitize=undefined-trap is an alias for -fsanitize=undefined. SANITIZER_GROUP("undefined-trap", UndefinedTrap, Undefined) diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h index 9c089908fdc13..491badcc804e7 100644 --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -276,30 +276,43 @@ namespace clang { /// CallingConv - Specifies the calling convention that a function uses. enum CallingConv { - CC_C, // __attribute__((cdecl)) - CC_X86StdCall, // __attribute__((stdcall)) - CC_X86FastCall, // __attribute__((fastcall)) - CC_X86ThisCall, // __attribute__((thiscall)) - CC_X86VectorCall, // __attribute__((vectorcall)) - CC_X86Pascal, // __attribute__((pascal)) - CC_Win64, // __attribute__((ms_abi)) - CC_X86_64SysV, // __attribute__((sysv_abi)) - CC_X86RegCall, // __attribute__((regcall)) - CC_AAPCS, // __attribute__((pcs("aapcs"))) - CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) - CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) - CC_SpirFunction, // default for OpenCL functions on SPIR target - CC_OpenCLKernel, // inferred for OpenCL kernels - CC_Swift, // __attribute__((swiftcall)) - CC_SwiftAsync, // __attribute__((swiftasynccall)) - CC_PreserveMost, // __attribute__((preserve_most)) - CC_PreserveAll, // __attribute__((preserve_all)) - CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) - CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) - CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) - CC_M68kRTD, // __attribute__((m68k_rtd)) - CC_PreserveNone, // __attribute__((preserve_none)) - CC_RISCVVectorCall, // __attribute__((riscv_vector_cc)) + CC_C, // __attribute__((cdecl)) + CC_X86StdCall, // __attribute__((stdcall)) + CC_X86FastCall, // __attribute__((fastcall)) + CC_X86ThisCall, // __attribute__((thiscall)) + CC_X86VectorCall, // __attribute__((vectorcall)) + CC_X86Pascal, // __attribute__((pascal)) + CC_Win64, // __attribute__((ms_abi)) + CC_X86_64SysV, // __attribute__((sysv_abi)) + CC_X86RegCall, // __attribute__((regcall)) + CC_AAPCS, // __attribute__((pcs("aapcs"))) + CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) + CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) + CC_SpirFunction, // default for OpenCL functions on SPIR target + CC_OpenCLKernel, // inferred for OpenCL kernels + CC_Swift, // __attribute__((swiftcall)) + CC_SwiftAsync, // __attribute__((swiftasynccall)) + CC_PreserveMost, // __attribute__((preserve_most)) + CC_PreserveAll, // __attribute__((preserve_all)) + CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) + CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) + CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) + CC_M68kRTD, // __attribute__((m68k_rtd)) + CC_PreserveNone, // __attribute__((preserve_none)) + CC_RISCVVectorCall, // __attribute__((riscv_vector_cc)) + CC_RISCVVLSCall_32, // __attribute__((riscv_vls_cc(32))) + CC_RISCVVLSCall_64, // __attribute__((riscv_vls_cc(64))) + CC_RISCVVLSCall_128, // __attribute__((riscv_vls_cc)) or + // __attribute__((riscv_vls_cc(128))) + CC_RISCVVLSCall_256, // __attribute__((riscv_vls_cc(256))) + CC_RISCVVLSCall_512, // __attribute__((riscv_vls_cc(512))) + CC_RISCVVLSCall_1024, // __attribute__((riscv_vls_cc(1024))) + CC_RISCVVLSCall_2048, // __attribute__((riscv_vls_cc(2048))) + CC_RISCVVLSCall_4096, // __attribute__((riscv_vls_cc(4096))) + CC_RISCVVLSCall_8192, // __attribute__((riscv_vls_cc(8192))) + CC_RISCVVLSCall_16384, // __attribute__((riscv_vls_cc(16384))) + CC_RISCVVLSCall_32768, // __attribute__((riscv_vls_cc(32768))) + CC_RISCVVLSCall_65536, // __attribute__((riscv_vls_cc(65536))) }; /// Checks whether the given calling convention supports variadic diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index ae49671058a01..9526fa5808aa5 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -320,6 +320,7 @@ def OpenACCShutdownConstruct : StmtNode; def OpenACCSetConstruct : StmtNode; def OpenACCUpdateConstruct : StmtNode; def OpenACCAtomicConstruct : StmtNode; +def OpenACCCacheConstruct : StmtNode; // OpenACC Additional Expressions. def OpenACCAsteriskSizeExpr : StmtNode; diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 291cf26cb2e78..d136b459e9cd4 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -253,6 +253,7 @@ class TargetInfo : public TransferrableTargetInfo, const char *MCountName; unsigned char RegParmMax, SSERegParmMax; TargetCXXABI TheCXXABI; + bool UseMicrosoftManglingForC = false; const LangASMap *AddrSpaceMap; mutable StringRef PlatformName; @@ -1344,6 +1345,11 @@ class TargetInfo : public TransferrableTargetInfo, return TheCXXABI; } + /// Should the Microsoft mangling scheme be used for C Calling Convention. + bool shouldUseMicrosoftCCforMangling() const { + return UseMicrosoftManglingForC; + } + /// Target the specified CPU. /// /// \return False on error (invalid CPU name). diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3afbba51bd138..b51106fa56759 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -19,7 +19,7 @@ include "arm_sve_sme_incl.td" // Loads // Load one vector (scalar base) -def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; @@ -33,7 +33,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { } // Load one vector (scalar base, VL displacement) -def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; @@ -247,10 +247,10 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = InvalidMode in { } // Load one vector, unextended load, non-temporal (scalar base) -def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; // Load one vector, unextended load, non-temporal (scalar base, VL displacement) -def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVLDNT1_BF : MInst<"svldnt1[_{2}]", "dPc", "b", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; @@ -265,7 +265,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { } multiclass StructLoad { - def : SInst; + def : SInst; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def: SInst; } @@ -314,11 +314,11 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { def SVLD1UDQ_VNUM : MInst<"svld1udq_vnum[_{d}]", "dPcl", "lUld", [IsLoad], MemEltTyInt64, "aarch64_sve_ld1udq">; // Load one vector (vector base + scalar offset) - def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; - def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; // Load one vector (scalar base + vector offset) - def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">; + def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">; // Load N-element structure into N vectors (scalar base) defm SVLD2Q : StructLoad<"svld2q[_{2}]", "2Pc", "aarch64_sve_ld2q_sret">; @@ -341,7 +341,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { // Stores // Store one vector (scalar base) -def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; @@ -350,7 +350,7 @@ def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore, Verify def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; // Store one vector (scalar base, VL displacement) -def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; @@ -435,7 +435,7 @@ def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "v } // let SVETargetGuard = "sve" multiclass StructStore { - def : SInst; + def : SInst; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def: SInst; } @@ -451,10 +451,10 @@ defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">; defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">; // Store one vector, with no truncation, non-temporal (scalar base) -def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) -def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; @@ -470,12 +470,12 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { def SVST1UDQ_VNUM : MInst<"svst1dq_vnum[_{d}]", "vPpld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">; // Store one vector (vector base + scalar offset) - def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; - def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; // Store one vector (scalar base + vector offset) - def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; - def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; + def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; + def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; // Store N vectors into N-element structure (scalar base) defm SVST2Q : StructStore<"svst2q[_{d}]", "vPc2", "aarch64_sve_st2q">; @@ -2042,20 +2042,20 @@ def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNo } multiclass MultiVecLoad { - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; @@ -2067,20 +2067,20 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { } multiclass MultiVecStore { - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 14afdfc2758ea..b65797e40d5f9 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -69,6 +69,11 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { return create(loc, ptr); } + cir::StoreOp createStore(mlir::Location loc, mlir::Value val, + mlir::Value dst) { + return create(loc, val, dst); + } + // // Block handling helpers // ---------------------- diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrVisitor.h b/clang/include/clang/CIR/Dialect/IR/CIRAttrVisitor.h deleted file mode 100644 index bbba89cb7e3fd..0000000000000 --- a/clang/include/clang/CIR/Dialect/IR/CIRAttrVisitor.h +++ /dev/null @@ -1,52 +0,0 @@ -//===- CIRAttrVisitor.h - Visitor for CIR attributes ------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the CirAttrVisitor interface. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_CIR_DIALECT_IR_CIRATTRVISITOR_H -#define LLVM_CLANG_CIR_DIALECT_IR_CIRATTRVISITOR_H - -#include "clang/CIR/Dialect/IR/CIRAttrs.h" - -namespace cir { - -template class CirAttrVisitor { -public: - // FIXME: Create a TableGen list to automatically handle new attributes - RetTy visit(mlir::Attribute attr) { - if (const auto intAttr = mlir::dyn_cast(attr)) - return getImpl().visitCirIntAttr(intAttr); - if (const auto fltAttr = mlir::dyn_cast(attr)) - return getImpl().visitCirFPAttr(fltAttr); - if (const auto ptrAttr = mlir::dyn_cast(attr)) - return getImpl().visitCirConstPtrAttr(ptrAttr); - llvm_unreachable("unhandled attribute type"); - } - - // If the implementation chooses not to implement a certain visit - // method, fall back to the parent. - RetTy visitCirIntAttr(cir::IntAttr attr) { - return getImpl().visitCirAttr(attr); - } - RetTy visitCirFPAttr(cir::FPAttr attr) { - return getImpl().visitCirAttr(attr); - } - RetTy visitCirConstPtrAttr(cir::ConstPtrAttr attr) { - return getImpl().visitCirAttr(attr); - } - - RetTy visitCirAttr(mlir::Attribute attr) { return RetTy(); } - - ImplClass &getImpl() { return *static_cast(this); } -}; - -} // namespace cir - -#endif // LLVM_CLANG_CIR_DIALECT_IR_CIRATTRVISITOR_H diff --git a/clang/include/clang/CIR/Dialect/IR/CIRDialect.h b/clang/include/clang/CIR/Dialect/IR/CIRDialect.h index 683176b139ca4..0684cf5034f5d 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRDialect.h +++ b/clang/include/clang/CIR/Dialect/IR/CIRDialect.h @@ -28,6 +28,8 @@ #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIROpsDialect.h.inc" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" +#include "clang/CIR/Interfaces/CIROpInterfaces.h" // TableGen'erated files for MLIR dialects require that a macro be defined when // they are included. GET_OP_CLASSES tells the file to define the classes for diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 083cf46a93ae6..e2ab50c78ec2d 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -18,6 +18,8 @@ include "clang/CIR/Dialect/IR/CIRDialect.td" include "clang/CIR/Dialect/IR/CIRTypes.td" include "clang/CIR/Dialect/IR/CIRAttrs.td" +include "clang/CIR/Interfaces/CIROpInterfaces.td" + include "mlir/IR/BuiltinAttributeInterfaces.td" include "mlir/IR/EnumAttr.td" include "mlir/IR/SymbolInterfaces.td" @@ -228,6 +230,45 @@ def LoadOp : CIR_Op<"load", [ // FIXME: add verifier. } +//===----------------------------------------------------------------------===// +// StoreOp +//===----------------------------------------------------------------------===// + +def StoreOp : CIR_Op<"store", [ + TypesMatchWith<"type of 'value' matches pointee type of 'addr'", + "addr", "value", + "cast($_self).getPointee()">, + DeclareOpInterfaceMethods]> { + + let summary = "Store value to memory address"; + let description = [{ + `cir.store` stores a value (first operand) to the memory address specified + in the second operand. A unit attribute `volatile` can be used to indicate + a volatile store. Store's can be marked atomic by using + `atomic()`. + + `align` can be used to specify an alignment that's different from the + default, which is computed from `result`'s type ABI data layout. + + Example: + + ```mlir + // Store a function argument to local storage, address in %0. + cir.store %arg0, %0 : i32, !cir.ptr + ``` + }]; + + let arguments = (ins CIR_AnyType:$value, + Arg:$addr); + + let assemblyFormat = [{ + $value `,` $addr attr-dict `:` type($value) `,` qualified(type($addr)) + }]; + + // FIXME: add verifier. +} + //===----------------------------------------------------------------------===// // ReturnOp //===----------------------------------------------------------------------===// @@ -391,6 +432,59 @@ def ScopeOp : CIR_Op<"scope", [ // GlobalOp //===----------------------------------------------------------------------===// +// Linkage types. This is currently a replay of llvm/IR/GlobalValue.h, this is +// currently handy as part of forwarding appropriate linkage types for LLVM +// lowering, specially useful for C++ support. + +// Externally visible function +def Global_ExternalLinkage : + I32EnumAttrCase<"ExternalLinkage", 0, "external">; +// Available for inspection, not emission. +def Global_AvailableExternallyLinkage : + I32EnumAttrCase<"AvailableExternallyLinkage", 1, "available_externally">; +// Keep one copy of function when linking (inline) +def Global_LinkOnceAnyLinkage : + I32EnumAttrCase<"LinkOnceAnyLinkage", 2, "linkonce">; +// Same, but only replaced by something equivalent. +def Global_LinkOnceODRLinkage : + I32EnumAttrCase<"LinkOnceODRLinkage", 3, "linkonce_odr">; +// Keep one copy of named function when linking (weak) +def Global_WeakAnyLinkage : + I32EnumAttrCase<"WeakAnyLinkage", 4, "weak">; +// Same, but only replaced by something equivalent. +def Global_WeakODRLinkage : + I32EnumAttrCase<"WeakODRLinkage", 5, "weak_odr">; +// TODO: should we add something like appending linkage too? +// Special purpose, only applies to global arrays +// def Global_AppendingLinkage : +// I32EnumAttrCase<"AppendingLinkage", 6, "appending">; +// Rename collisions when linking (static functions). +def Global_InternalLinkage : + I32EnumAttrCase<"InternalLinkage", 7, "internal">; +// Like Internal, but omit from symbol table, prefix it with +// "cir_" to prevent clash with MLIR's symbol "private". +def Global_PrivateLinkage : + I32EnumAttrCase<"PrivateLinkage", 8, "cir_private">; +// ExternalWeak linkage description. +def Global_ExternalWeakLinkage : + I32EnumAttrCase<"ExternalWeakLinkage", 9, "extern_weak">; +// Tentative definitions. +def Global_CommonLinkage : + I32EnumAttrCase<"CommonLinkage", 10, "common">; + +/// An enumeration for the kinds of linkage for global values. +def GlobalLinkageKind : I32EnumAttr< + "GlobalLinkageKind", + "Linkage type/kind", + [Global_ExternalLinkage, Global_AvailableExternallyLinkage, + Global_LinkOnceAnyLinkage, Global_LinkOnceODRLinkage, + Global_WeakAnyLinkage, Global_WeakODRLinkage, + Global_InternalLinkage, Global_PrivateLinkage, + Global_ExternalWeakLinkage, Global_CommonLinkage + ]> { + let cppNamespace = "::cir"; +} + // TODO(CIR): For starters, cir.global has only name and type. The other // properties of a global variable will be added over time as more of ClangIR // is upstreamed. @@ -402,12 +496,19 @@ def GlobalOp : CIR_Op<"global"> { The backing memory for the variable is allocated statically and is described by the type of the variable. + + The `linkage` tracks C/C++ linkage types, currently very similar to LLVM's. }]; - let arguments = (ins SymbolNameAttr:$sym_name, TypeAttr:$sym_type, - OptionalAttr:$initial_value); + let arguments = (ins SymbolNameAttr:$sym_name, + TypeAttr:$sym_type, + Arg:$linkage, + OptionalAttr:$initial_value, + UnitAttr:$dsolocal); let assemblyFormat = [{ + $linkage + (`dsolocal` $dsolocal^)? $sym_name custom($sym_type, $initial_value) attr-dict @@ -420,8 +521,12 @@ def GlobalOp : CIR_Op<"global"> { let skipDefaultBuilders = 1; - let builders = [OpBuilder<(ins "llvm::StringRef":$sym_name, - "mlir::Type":$sym_type)>]; + let builders = [OpBuilder<(ins + "llvm::StringRef":$sym_name, + "mlir::Type":$sym_type, + // CIR defaults to external linkage. + CArg<"cir::GlobalLinkageKind", + "cir::GlobalLinkageKind::ExternalLinkage">:$linkage)>]; let hasVerifier = 1; } diff --git a/clang/include/clang/CIR/Dialect/IR/CIROpsEnums.h b/clang/include/clang/CIR/Dialect/IR/CIROpsEnums.h new file mode 100644 index 0000000000000..fead5725d183d --- /dev/null +++ b/clang/include/clang/CIR/Dialect/IR/CIROpsEnums.h @@ -0,0 +1,118 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the CIR enumerations. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_CIR_DIALECT_IR_CIROPSENUMS_H +#define CLANG_CIR_DIALECT_IR_CIROPSENUMS_H + +#include "mlir/IR/BuiltinAttributes.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h.inc" + +namespace cir { + +static bool isExternalLinkage(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::ExternalLinkage; +} +static bool isAvailableExternallyLinkage(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::AvailableExternallyLinkage; +} +static bool isLinkOnceAnyLinkage(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::LinkOnceAnyLinkage; +} +static bool isLinkOnceODRLinkage(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::LinkOnceODRLinkage; +} +static bool isLinkOnceLinkage(GlobalLinkageKind linkage) { + return isLinkOnceAnyLinkage(linkage) || isLinkOnceODRLinkage(linkage); +} +static bool isWeakAnyLinkage(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::WeakAnyLinkage; +} +static bool isWeakODRLinkage(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::WeakODRLinkage; +} +static bool isWeakLinkage(GlobalLinkageKind linkage) { + return isWeakAnyLinkage(linkage) || isWeakODRLinkage(linkage); +} +static bool isInternalLinkage(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::InternalLinkage; +} +static bool isPrivateLinkage(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::PrivateLinkage; +} +static bool isLocalLinkage(GlobalLinkageKind linkage) { + return isInternalLinkage(linkage) || isPrivateLinkage(linkage); +} +static bool isExternalWeakLinkage(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::ExternalWeakLinkage; +} +LLVM_ATTRIBUTE_UNUSED static bool isCommonLinkage(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::CommonLinkage; +} +LLVM_ATTRIBUTE_UNUSED static bool +isValidDeclarationLinkage(GlobalLinkageKind linkage) { + return isExternalWeakLinkage(linkage) || isExternalLinkage(linkage); +} + +/// Whether the definition of this global may be replaced by something +/// non-equivalent at link time. For example, if a function has weak linkage +/// then the code defining it may be replaced by different code. +LLVM_ATTRIBUTE_UNUSED static bool +isInterposableLinkage(GlobalLinkageKind linkage) { + switch (linkage) { + case GlobalLinkageKind::WeakAnyLinkage: + case GlobalLinkageKind::LinkOnceAnyLinkage: + case GlobalLinkageKind::CommonLinkage: + case GlobalLinkageKind::ExternalWeakLinkage: + return true; + + case GlobalLinkageKind::AvailableExternallyLinkage: + case GlobalLinkageKind::LinkOnceODRLinkage: + case GlobalLinkageKind::WeakODRLinkage: + // The above three cannot be overridden but can be de-refined. + + case GlobalLinkageKind::ExternalLinkage: + case GlobalLinkageKind::InternalLinkage: + case GlobalLinkageKind::PrivateLinkage: + return false; + } + llvm_unreachable("Fully covered switch above!"); +} + +/// Whether the definition of this global may be discarded if it is not used +/// in its compilation unit. +LLVM_ATTRIBUTE_UNUSED static bool +isDiscardableIfUnused(GlobalLinkageKind linkage) { + return isLinkOnceLinkage(linkage) || isLocalLinkage(linkage) || + isAvailableExternallyLinkage(linkage); +} + +/// Whether the definition of this global may be replaced at link time. NB: +/// Using this method outside of the code generators is almost always a +/// mistake: when working at the IR level use isInterposable instead as it +/// knows about ODR semantics. +LLVM_ATTRIBUTE_UNUSED static bool isWeakForLinker(GlobalLinkageKind linkage) { + return linkage == GlobalLinkageKind::WeakAnyLinkage || + linkage == GlobalLinkageKind::WeakODRLinkage || + linkage == GlobalLinkageKind::LinkOnceAnyLinkage || + linkage == GlobalLinkageKind::LinkOnceODRLinkage || + linkage == GlobalLinkageKind::CommonLinkage || + linkage == GlobalLinkageKind::ExternalWeakLinkage; +} + +LLVM_ATTRIBUTE_UNUSED static bool isValidLinkage(GlobalLinkageKind gl) { + return isExternalLinkage(gl) || isLocalLinkage(gl) || isWeakLinkage(gl) || + isLinkOnceLinkage(gl); +} + +} // namespace cir + +#endif // CLANG_CIR_DIALECT_IR_CIROPSENUMS_H diff --git a/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt b/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt index 1fdbc24ba6b4a..39292fb541daa 100644 --- a/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt +++ b/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt @@ -16,4 +16,6 @@ add_dependencies(mlir-headers MLIRCIROpsIncGen) mlir_tablegen(CIROpsAttributes.h.inc -gen-attrdef-decls) mlir_tablegen(CIROpsAttributes.cpp.inc -gen-attrdef-defs) -add_public_tablegen_target(MLIRCIRAttrsEnumsGen) +mlir_tablegen(CIROpsEnums.h.inc -gen-enum-decls) +mlir_tablegen(CIROpsEnums.cpp.inc -gen-enum-defs) +add_public_tablegen_target(MLIRCIREnumsGen) diff --git a/clang/include/clang/CIR/Interfaces/CIROpInterfaces.h b/clang/include/clang/CIR/Interfaces/CIROpInterfaces.h new file mode 100644 index 0000000000000..cb7488d3aee36 --- /dev/null +++ b/clang/include/clang/CIR/Interfaces/CIROpInterfaces.h @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the interface to CIR operations. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_CIR_INTERFACES_CIR_OP_H +#define CLANG_CIR_INTERFACES_CIR_OP_H + +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/Value.h" +#include "mlir/Interfaces/CallInterfaces.h" + +#include "clang/AST/Attr.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/Mangle.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" + +/// Include the generated interface declarations. +#include "clang/CIR/Interfaces/CIROpInterfaces.h.inc" + +#endif // CLANG_CIR_INTERFACES_CIR_OP_H diff --git a/clang/include/clang/CIR/Interfaces/CIROpInterfaces.td b/clang/include/clang/CIR/Interfaces/CIROpInterfaces.td new file mode 100644 index 0000000000000..39ef402c59e43 --- /dev/null +++ b/clang/include/clang/CIR/Interfaces/CIROpInterfaces.td @@ -0,0 +1,143 @@ +//===- CIROpInterfaces.td - CIR Op Interface Definitions --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the interface to CIR operations. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_CIR_INTERFACES_CIROPINTERFACES_TD +#define CLANG_CIR_INTERFACES_CIROPINTERFACES_TD + +include "mlir/IR/OpBase.td" +include "mlir/IR/SymbolInterfaces.td" + +let cppNamespace = "::cir" in { + def CIRGlobalValueInterface + : OpInterface<"CIRGlobalValueInterface", [Symbol]> { + + let methods = [ + InterfaceMethod<"", + "bool", "hasExternalLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isExternalLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasAvailableExternallyLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isAvailableExternallyLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasLinkOnceLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isLinkOnceLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasLinkOnceAnyLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isLinkOnceAnyLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasLinkOnceODRLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isLinkOnceODRLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasWeakLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isWeakLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasWeakAnyLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isWeakAnyLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasWeakODRLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isWeakODRLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasInternalLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isInternalLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasPrivateLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isPrivateLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasLocalLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isLocalLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasExternalWeakLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isExternalWeakLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "hasCommonLinkage", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isCommonLinkage($_op.getLinkage()); + }] + >, + InterfaceMethod<"", + "bool", "isDeclarationForLinker", (ins), [{}], + /*defaultImplementation=*/[{ + if ($_op.hasAvailableExternallyLinkage()) + return true; + return $_op.isDeclaration(); + }] + >, + InterfaceMethod<"", + "bool", "hasComdat", (ins), [{}], + /*defaultImplementation=*/[{ + return $_op.getComdat(); + }] + >, + InterfaceMethod<"", + "void", "setDSOLocal", (ins "bool":$val), [{}], + /*defaultImplementation=*/[{ + $_op.setDsolocal(val); + }] + >, + InterfaceMethod<"", + "bool", "isDSOLocal", (ins), [{}], + /*defaultImplementation=*/[{ + return $_op.getDsolocal(); + }] + >, + InterfaceMethod<"", + "bool", "isWeakForLinker", (ins), [{}], + /*defaultImplementation=*/[{ + return cir::isWeakForLinker($_op.getLinkage()); + }] + > + ]; + let extraClassDeclaration = [{ + bool hasDefaultVisibility(); + bool canBenefitFromLocalAlias(); + }]; + } + +} // namespace cir + +#endif // CLANG_CIR_INTERFACES_CIROPINTERFACES_TD diff --git a/clang/include/clang/CIR/Interfaces/CMakeLists.txt b/clang/include/clang/CIR/Interfaces/CMakeLists.txt index 1c90b6b5a23cb..e9929f6964605 100644 --- a/clang/include/clang/CIR/Interfaces/CMakeLists.txt +++ b/clang/include/clang/CIR/Interfaces/CMakeLists.txt @@ -3,6 +3,14 @@ # directory which is not the case for CIR (and also FIR, both have similar # workarounds). +function(add_clang_mlir_op_interface interface) + set(LLVM_TARGET_DEFINITIONS ${interface}.td) + mlir_tablegen(${interface}.h.inc -gen-op-interface-decls) + mlir_tablegen(${interface}.cpp.inc -gen-op-interface-defs) + add_public_tablegen_target(MLIR${interface}IncGen) + add_dependencies(mlir-generic-headers MLIR${interface}IncGen) +endfunction() + function(add_clang_mlir_type_interface interface) set(LLVM_TARGET_DEFINITIONS ${interface}.td) mlir_tablegen(${interface}.h.inc -gen-type-interface-decls) @@ -11,4 +19,5 @@ function(add_clang_mlir_type_interface interface) add_dependencies(mlir-generic-headers MLIR${interface}IncGen) endfunction() +add_clang_mlir_op_interface(CIROpInterfaces) add_clang_mlir_type_interface(CIRFPTypeInterface) diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 5c7e10d018809..6f845b7689e51 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -27,9 +27,6 @@ struct MissingFeatures { // Address space related static bool addressSpace() { return false; } - // This isn't needed until we add support for bools. - static bool convertTypeForMemory() { return false; } - // CIRGenFunction implementation details static bool cgfSymbolTable() { return false; } @@ -38,12 +35,19 @@ struct MissingFeatures { static bool opGlobalThreadLocal() { return false; } static bool opGlobalConstant() { return false; } static bool opGlobalAlignment() { return false; } - static bool opGlobalLinkage() { return false; } - // Load attributes + static bool supportIFuncAttr() { return false; } + static bool supportVisibility() { return false; } + static bool supportComdat() { return false; } + + // Load/store attributes static bool opLoadThreadLocal() { return false; } static bool opLoadEmitScalarRangeCheck() { return false; } static bool opLoadBooleanRepresentation() { return false; } + static bool opLoadStoreTbaa() { return false; } + static bool opLoadStoreMemOrder() { return false; } + static bool opLoadStoreVolatile() { return false; } + static bool opLoadStoreAlignment() { return false; } // AllocaOp handling static bool opAllocaVarDeclContext() { return false; } @@ -55,10 +59,23 @@ struct MissingFeatures { static bool opAllocaOpenMPThreadPrivate() { return false; } static bool opAllocaEscapeByReference() { return false; } static bool opAllocaReference() { return false; } + static bool opAllocaAnnotations() { return false; } + static bool opAllocaDynAllocSize() { return false; } + + // FuncOp handling + static bool opFuncOpenCLKernelMetadata() { return false; } + static bool opFuncCallingConv() { return false; } + static bool opFuncExtraAttrs() { return false; } + static bool opFuncDsolocal() { return false; } + static bool opFuncLinkage() { return false; } + static bool opFuncVisibility() { return false; } // Misc static bool scalarConversionOpts() { return false; } static bool tryEmitAsConstant() { return false; } + static bool constructABIArgDirectExtend() { return false; } + static bool opGlobalViewAttr() { return false; } + static bool lowerModeOptLevel() { return false; } }; } // namespace cir diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 883d6a969c258..d0414aba35209 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3176,7 +3176,7 @@ def modules_reduced_bmi : Flag<["-"], "fmodules-reduced-bmi">, HelpText<"Generate the reduced BMI">, MarshallingInfoFlag>; -def experimental_modules_reduced_bmi : Flag<["-"], "fexperimental-modules-reduced-bmi">, +def experimental_modules_reduced_bmi : Flag<["-"], "fexperimental-modules-reduced-bmi">, Group, Visibility<[ClangOption, CC1Option]>, Alias; def fmodules_embed_all_files : Joined<["-"], "fmodules-embed-all-files">, @@ -4729,7 +4729,7 @@ def mexecute_only : Flag<["-"], "mexecute-only">, Group, def mno_execute_only : Flag<["-"], "mno-execute-only">, Group, HelpText<"Allow generation of data access to code sections (ARM only)">; let Flags = [TargetSpecific] in { -def mtp_mode_EQ : Joined<["-"], "mtp=">, Group, Values<"soft,cp15,tpidrurw,tpidruro,tpidrprw,el0,el1,el2,el3,tpidr_el0,tpidr_el1,tpidr_el2,tpidr_el3,tpidrro_el0">, +def mtp_mode_EQ : Joined<["-"], "mtp=">, Group, Values<"soft,cp15,tpidrurw,tpidruro,tpidrprw,el0,el1,el2,el3,tpidr_el0,tpidr_el1,tpidr_el2,tpidr_el3,tpidrro_el0,auto">, HelpText<"Thread pointer access method. " "For AArch32: 'soft' uses a function call, or 'tpidrurw', 'tpidruro' or 'tpidrprw' use the three CP15 registers. 'cp15' is an alias for 'tpidruro'. " "For AArch64: 'tpidr_el0', 'tpidr_el1', 'tpidr_el2', 'tpidr_el3' or 'tpidrro_el0' use the five system registers. 'elN' is an alias for 'tpidr_elN'.">; @@ -6777,8 +6777,16 @@ defm backtrace : BooleanFFlag<"backtrace">, Group; defm bounds_check : BooleanFFlag<"bounds-check">, Group; defm check_array_temporaries : BooleanFFlag<"check-array-temporaries">, Group; defm cray_pointer : BooleanFFlag<"cray-pointer">, Group; -defm d_lines_as_code : BooleanFFlag<"d-lines-as-code">, Group; -defm d_lines_as_comments : BooleanFFlag<"d-lines-as-comments">, Group; +defm d_lines_as_code : BooleanFFlag<"d-lines-as-code">, + HelpText<"Treat fixed form lines with 'd' or 'D' in the " + "first column as blank.">, + Group, + Visibility<[FlangOption, FC1Option]>; +defm d_lines_as_comments : BooleanFFlag<"d-lines-as-comments">, + HelpText<"Treat fixed form lines with 'd' or 'D' in " + "the first column as comments.">, + Group, + Visibility<[FlangOption, FC1Option]>; defm dollar_ok : BooleanFFlag<"dollar-ok">, Group; defm dump_fortran_optimized : BooleanFFlag<"dump-fortran-optimized">, Group; defm dump_fortran_original : BooleanFFlag<"dump-fortran-original">, Group; @@ -7431,7 +7439,7 @@ def fuse_register_sized_bitfield_access: Flag<["-"], "fuse-register-sized-bitfie def relaxed_aliasing : Flag<["-"], "relaxed-aliasing">, HelpText<"Turn off Type Based Alias Analysis">, MarshallingInfoFlag>; -defm pointer_tbaa: BoolOption<"", "pointer-tbaa", CodeGenOpts<"PointerTBAA">, +defm pointer_tbaa: BoolOption<"", "pointer-tbaa", CodeGenOpts<"PointerTBAA">, DefaultTrue, PosFlag, NegFlag, diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 335258d597028..049156e266c70 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3730,6 +3730,11 @@ class Parser : public CodeCompletionHandler { return Out; } }; + struct OpenACCCacheParseInfo { + bool Failed = false; + SourceLocation ReadOnlyLoc; + SmallVector Vars; + }; /// Represents the 'error' state of parsing an OpenACC Clause, and stores /// whether we can continue parsing, or should give up on the directive. @@ -3752,13 +3757,15 @@ class Parser : public CodeCompletionHandler { /// Helper that parses an ID Expression based on the language options. ExprResult ParseOpenACCIDExpression(); /// Parses the variable list for the `cache` construct. - void ParseOpenACCCacheVarList(); + OpenACCCacheParseInfo ParseOpenACCCacheVarList(); using OpenACCVarParseResult = std::pair; /// Parses a single variable in a variable list for OpenACC. - OpenACCVarParseResult ParseOpenACCVar(OpenACCClauseKind CK); + OpenACCVarParseResult ParseOpenACCVar(OpenACCDirectiveKind DK, + OpenACCClauseKind CK); /// Parses the variable list for the variety of places that take a var-list. - llvm::SmallVector ParseOpenACCVarList(OpenACCClauseKind CK); + llvm::SmallVector ParseOpenACCVarList(OpenACCDirectiveKind DK, + OpenACCClauseKind CK); /// Parses any parameters for an OpenACC Clause, including required/optional /// parens. OpenACCClauseParseResult diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h index c03ec00d03dc5..6e08762dcc6d7 100644 --- a/clang/include/clang/Sema/Overload.h +++ b/clang/include/clang/Sema/Overload.h @@ -1265,11 +1265,11 @@ class Sema; }; - bool isBetterOverloadCandidate(Sema &S, - const OverloadCandidate &Cand1, + bool isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1, const OverloadCandidate &Cand2, SourceLocation Loc, - OverloadCandidateSet::CandidateSetKind Kind); + OverloadCandidateSet::CandidateSetKind Kind, + bool PartialOverloading = false); struct ConstructorInfo { DeclAccessPair FoundDecl; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ebdbc69384efb..08621e633b55c 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3168,6 +3168,13 @@ class Sema final : public SemaBase { DeclGroupPtrTy ConvertDeclToDeclGroup(Decl *Ptr, Decl *OwnedType = nullptr); + enum class DiagCtorKind { None, Implicit, Typename }; + /// Returns the TypeDeclType for the given type declaration, + /// as ASTContext::getTypeDeclType would, but + /// performs the required semantic checks for name lookup of said entity. + QualType getTypeDeclType(DeclContext *LookupCtx, DiagCtorKind DCK, + TypeDecl *TD, SourceLocation NameLoc); + /// If the identifier refers to a type name within this scope, /// return the declaration of that type. /// @@ -4392,11 +4399,11 @@ class Sema final : public SemaBase { // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check. bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee); -private: /// Function or variable declarations to be checked for whether the deferred /// diagnostics should be emitted. llvm::SmallSetVector DeclsToCheckForDeferredDiags; +private: /// Map of current shadowing declarations to shadowed declarations. Warn if /// it looks like the user is trying to modify the shadowing declaration. llvm::DenseMap ShadowingDecls; @@ -11820,7 +11827,7 @@ class Sema final : public SemaBase { bool *ConstraintsNotSatisfied = nullptr); bool CheckTemplateTypeArgument( - TemplateTypeParmDecl *Param, TemplateArgumentLoc &Arg, + TemplateArgumentLoc &Arg, SmallVectorImpl &SugaredConverted, SmallVectorImpl &CanonicalConverted); @@ -11856,9 +11863,13 @@ class Sema final : public SemaBase { bool PartialOrdering, bool *StrictPackMatch); + /// Print the given named declaration to a string, + /// using the current PrintingPolicy, except that + /// TerseOutput will always be set. + SmallString<128> toTerseString(const NamedDecl &D) const; + void NoteTemplateLocation(const NamedDecl &Decl, std::optional ParamRange = {}); - void NoteTemplateParameterLocation(const NamedDecl &Decl); /// Given a non-type template argument that refers to a /// declaration and the type of its corresponding non-type template @@ -11973,15 +11984,13 @@ class Sema final : public SemaBase { bool TemplateParameterListsAreEqual( const TemplateCompareNewDeclInfo &NewInstFrom, TemplateParameterList *New, const NamedDecl *OldInstFrom, TemplateParameterList *Old, bool Complain, - TemplateParameterListEqualKind Kind, - SourceLocation TemplateArgLoc = SourceLocation()); + TemplateParameterListEqualKind Kind); - bool TemplateParameterListsAreEqual( - TemplateParameterList *New, TemplateParameterList *Old, bool Complain, - TemplateParameterListEqualKind Kind, - SourceLocation TemplateArgLoc = SourceLocation()) { + bool TemplateParameterListsAreEqual(TemplateParameterList *New, + TemplateParameterList *Old, bool Complain, + TemplateParameterListEqualKind Kind) { return TemplateParameterListsAreEqual(nullptr, New, nullptr, Old, Complain, - Kind, TemplateArgLoc); + Kind); } /// Check whether a template can be declared within this scope. @@ -12617,7 +12626,8 @@ class Sema final : public SemaBase { FunctionTemplateDecl *getMoreSpecializedTemplate( FunctionTemplateDecl *FT1, FunctionTemplateDecl *FT2, SourceLocation Loc, TemplatePartialOrderingContext TPOC, unsigned NumCallArguments1, - QualType RawObj1Ty = {}, QualType RawObj2Ty = {}, bool Reversed = false); + QualType RawObj1Ty = {}, QualType RawObj2Ty = {}, bool Reversed = false, + bool PartialOverloading = false); /// Retrieve the most specialized of the given function template /// specializations. @@ -12860,6 +12870,11 @@ class Sema final : public SemaBase { /// We are performing partial ordering for template template parameters. PartialOrderingTTP, + + /// We are Checking a Template Parameter, so for any diagnostics which + /// occur in this scope, we will add a context note which points to this + /// template parameter. + CheckTemplateParameter, } Kind; /// Was the enclosing context a non-instantiation SFINAE context? @@ -13087,6 +13102,11 @@ class Sema final : public SemaBase { PartialOrderingTTP, TemplateDecl *PArg, SourceRange InstantiationRange = SourceRange()); + struct CheckTemplateParameter {}; + /// \brief Note that we are checking a template parameter. + InstantiatingTemplate(Sema &SemaRef, CheckTemplateParameter, + NamedDecl *Param); + /// Note that we have finished instantiating this template. void Clear(); @@ -13120,6 +13140,13 @@ class Sema final : public SemaBase { InstantiatingTemplate &operator=(const InstantiatingTemplate &) = delete; }; + /// For any diagnostics which occur within its scope, adds a context note + /// pointing to the declaration of the template parameter. + struct CheckTemplateParameterRAII : InstantiatingTemplate { + CheckTemplateParameterRAII(Sema &S, NamedDecl *Param) + : InstantiatingTemplate(S, CheckTemplateParameter(), Param) {} + }; + bool SubstTemplateArgument(const TemplateArgumentLoc &Input, const MultiLevelTemplateArgumentList &TemplateArgs, TemplateArgumentLoc &Output, @@ -13428,6 +13455,10 @@ class Sema final : public SemaBase { bool ForCallExpr = false); ExprResult SubstExpr(Expr *E, const MultiLevelTemplateArgumentList &TemplateArgs); + /// Substitute an expression as if it is a address-of-operand, which makes it + /// act like a CXXIdExpression rather than an attempt to call. + ExprResult SubstCXXIdExpr(Expr *E, + const MultiLevelTemplateArgumentList &TemplateArgs); // A RAII type used by the TemplateDeclInstantiator and TemplateInstantiator // to disable constraint evaluation, then restore the state. @@ -13649,12 +13680,16 @@ class Sema final : public SemaBase { class LocalEagerInstantiationScope { public: - LocalEagerInstantiationScope(Sema &S) : S(S) { + LocalEagerInstantiationScope(Sema &S, bool AtEndOfTU) + : S(S), AtEndOfTU(AtEndOfTU) { SavedPendingLocalImplicitInstantiations.swap( S.PendingLocalImplicitInstantiations); } - void perform() { S.PerformPendingInstantiations(/*LocalOnly=*/true); } + void perform() { + S.PerformPendingInstantiations(/*LocalOnly=*/true, + /*AtEndOfTU=*/AtEndOfTU); + } ~LocalEagerInstantiationScope() { assert(S.PendingLocalImplicitInstantiations.empty() && @@ -13665,6 +13700,7 @@ class Sema final : public SemaBase { private: Sema &S; + bool AtEndOfTU; std::deque SavedPendingLocalImplicitInstantiations; }; @@ -13687,8 +13723,8 @@ class Sema final : public SemaBase { class GlobalEagerInstantiationScope { public: - GlobalEagerInstantiationScope(Sema &S, bool Enabled) - : S(S), Enabled(Enabled) { + GlobalEagerInstantiationScope(Sema &S, bool Enabled, bool AtEndOfTU) + : S(S), Enabled(Enabled), AtEndOfTU(AtEndOfTU) { if (!Enabled) return; @@ -13702,7 +13738,8 @@ class Sema final : public SemaBase { void perform() { if (Enabled) { S.DefineUsedVTables(); - S.PerformPendingInstantiations(); + S.PerformPendingInstantiations(/*LocalOnly=*/false, + /*AtEndOfTU=*/AtEndOfTU); } } @@ -13717,7 +13754,8 @@ class Sema final : public SemaBase { S.SavedVTableUses.pop_back(); // Restore the set of pending implicit instantiations. - if (S.TUKind != TU_Prefix || !S.LangOpts.PCHInstantiateTemplates) { + if ((S.TUKind != TU_Prefix || !S.LangOpts.PCHInstantiateTemplates) && + AtEndOfTU) { assert(S.PendingInstantiations.empty() && "PendingInstantiations should be empty before it is discarded."); S.PendingInstantiations.swap(S.SavedPendingInstantiations.back()); @@ -13736,6 +13774,7 @@ class Sema final : public SemaBase { private: Sema &S; bool Enabled; + bool AtEndOfTU; }; ExplicitSpecifier instantiateExplicitSpecifier( @@ -13921,7 +13960,8 @@ class Sema final : public SemaBase { /// Performs template instantiation for all implicit template /// instantiations we have seen until this point. - void PerformPendingInstantiations(bool LocalOnly = false); + void PerformPendingInstantiations(bool LocalOnly = false, + bool AtEndOfTU = true); TemplateParameterList * SubstTemplateParams(TemplateParameterList *Params, DeclContext *Owner, diff --git a/clang/include/clang/Sema/SemaBase.h b/clang/include/clang/Sema/SemaBase.h index 0b05245ab9686..463cae83c7e81 100644 --- a/clang/include/clang/Sema/SemaBase.h +++ b/clang/include/clang/Sema/SemaBase.h @@ -42,6 +42,7 @@ class SemaBase { ASTContext &getASTContext() const; DiagnosticsEngine &getDiagnostics() const; const LangOptions &getLangOpts() const; + DeclContext *getCurContext() const; /// Helper class that creates diagnostics with optional /// template instantiation stacks. diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index 3004b98760a98..748dcdd251a92 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -33,6 +33,9 @@ class IdentifierInfo; class OpenACCClause; class SemaOpenACC : public SemaBase { +public: + using DeclGroupPtrTy = OpaquePtr; + private: struct ComputeConstructInfo { /// Which type of compute construct we are inside of, which we can use to @@ -158,6 +161,18 @@ class SemaOpenACC : public SemaBase { /// Helper function for checking the 'for' and 'range for' stmts. void ForStmtBeginHelper(SourceLocation ForLoc, ForStmtBeginChecker &C); + // The 'declare' construct requires only a single reference among ALL declare + // directives in a context. We store existing references to check. Because the + // rules prevent referencing the same variable from multiple declaration + // contexts, we can just store the declaration and location of the reference. + llvm::DenseMap + DeclareVarReferences; + // The 'routine' construct disallows magic-statics in a function referred to + // by a 'routine' directive. So record any of these that we see so we can + // check them later. + llvm::SmallDenseMap + MagicStaticLocs; + public: ComputeConstructInfo &getActiveComputeConstructInfo() { return ActiveComputeConstructInfo; @@ -411,6 +426,8 @@ class SemaOpenACC : public SemaBase { ClauseKind == OpenACCClauseKind::Reduction || ClauseKind == OpenACCClauseKind::Host || ClauseKind == OpenACCClauseKind::Device || + ClauseKind == OpenACCClauseKind::DeviceResident || + ClauseKind == OpenACCClauseKind::Link || (ClauseKind == OpenACCClauseKind::Self && DirKind == OpenACCDirectiveKind::Update) || ClauseKind == OpenACCClauseKind::FirstPrivate) && @@ -427,23 +444,10 @@ class SemaOpenACC : public SemaBase { } bool isReadOnly() const { - assert((ClauseKind == OpenACCClauseKind::CopyIn || - ClauseKind == OpenACCClauseKind::PCopyIn || - ClauseKind == OpenACCClauseKind::PresentOrCopyIn) && - "Only copyin accepts 'readonly:' tag"); return std::get(Details).IsReadOnly; } - bool isZero() const { - assert((ClauseKind == OpenACCClauseKind::CopyOut || - ClauseKind == OpenACCClauseKind::PCopyOut || - ClauseKind == OpenACCClauseKind::PresentOrCopyOut || - ClauseKind == OpenACCClauseKind::Create || - ClauseKind == OpenACCClauseKind::PCreate || - ClauseKind == OpenACCClauseKind::PresentOrCreate) && - "Only copyout/create accepts 'zero' tag"); - return std::get(Details).IsZero; - } + bool isZero() const { return std::get(Details).IsZero; } bool isForce() const { assert(ClauseKind == OpenACCClauseKind::Collapse && @@ -557,6 +561,8 @@ class SemaOpenACC : public SemaBase { ClauseKind == OpenACCClauseKind::DevicePtr || ClauseKind == OpenACCClauseKind::Host || ClauseKind == OpenACCClauseKind::Device || + ClauseKind == OpenACCClauseKind::DeviceResident || + ClauseKind == OpenACCClauseKind::Link || (ClauseKind == OpenACCClauseKind::Self && DirKind == OpenACCDirectiveKind::Update) || ClauseKind == OpenACCClauseKind::FirstPrivate) && @@ -600,6 +606,8 @@ class SemaOpenACC : public SemaBase { ClauseKind == OpenACCClauseKind::DevicePtr || ClauseKind == OpenACCClauseKind::Host || ClauseKind == OpenACCClauseKind::Device || + ClauseKind == OpenACCClauseKind::DeviceResident || + ClauseKind == OpenACCClauseKind::Link || (ClauseKind == OpenACCClauseKind::Self && DirKind == OpenACCDirectiveKind::Update) || ClauseKind == OpenACCClauseKind::FirstPrivate) && @@ -690,7 +698,8 @@ class SemaOpenACC : public SemaBase { /// parsing has consumed the 'annot_pragma_openacc_end' token. This DOES /// happen before any associated declarations or statements have been parsed. /// This function is only called when we are parsing a 'Decl' context. - bool ActOnStartDeclDirective(OpenACCDirectiveKind K, SourceLocation StartLoc); + bool ActOnStartDeclDirective(OpenACCDirectiveKind K, SourceLocation StartLoc, + ArrayRef Clauses); /// Called when we encounter an associated statement for our construct, this /// should check legality of the statement as it appertains to this Construct. StmtResult ActOnAssociatedStmt(SourceLocation DirectiveLoc, @@ -720,6 +729,7 @@ class SemaOpenACC : public SemaBase { /// MiscLoc: First misc location, if necessary (not all constructs). /// Exprs: List of expressions on the construct itself, if necessary (not all /// constructs). + /// FuncRef: used only for Routine, this is the function being referenced. /// AK: The atomic kind of the directive, if necessary (atomic only) /// RParenLoc: Location of the right paren, if it exists (not on all /// constructs). @@ -732,19 +742,12 @@ class SemaOpenACC : public SemaBase { OpenACCAtomicKind AK, SourceLocation RParenLoc, SourceLocation EndLoc, ArrayRef Clauses, StmtResult AssocStmt); - StmtResult ActOnEndStmtDirective( - OpenACCDirectiveKind K, SourceLocation StartLoc, SourceLocation DirLoc, - SourceLocation LParenLoc, SourceLocation MiscLoc, ArrayRef Exprs, - SourceLocation RParenLoc, SourceLocation EndLoc, - ArrayRef Clauses, StmtResult AssocStmt) { - return ActOnEndStmtDirective(K, StartLoc, DirLoc, LParenLoc, MiscLoc, Exprs, - OpenACCAtomicKind::None, RParenLoc, EndLoc, - Clauses, AssocStmt); - } - /// Called after the directive has been completely parsed, including the /// declaration group or associated statement. - DeclGroupRef ActOnEndDeclDirective(); + DeclGroupRef ActOnEndDeclDirective( + OpenACCDirectiveKind K, SourceLocation StartLoc, SourceLocation DirLoc, + SourceLocation LParenLoc, Expr *FuncRef, SourceLocation RParenLoc, + SourceLocation EndLoc, ArrayRef Clauses); /// Called when encountering an 'int-expr' for OpenACC, and manages /// conversions and diagnostics to 'int'. @@ -753,7 +756,23 @@ class SemaOpenACC : public SemaBase { /// Called when encountering a 'var' for OpenACC, ensures it is actually a /// declaration reference to a variable of the correct type. - ExprResult ActOnVar(OpenACCClauseKind CK, Expr *VarExpr); + ExprResult ActOnVar(OpenACCDirectiveKind DK, OpenACCClauseKind CK, + Expr *VarExpr); + /// Helper function called by ActonVar that is used to check a 'cache' var. + ExprResult ActOnCacheVar(Expr *VarExpr); + /// Function called when a variable declarator is created, which lets us + /// impelment the 'routine' 'function static variables' restriction. + void ActOnVariableDeclarator(VarDecl *VD); + + // Called after 'ActOnVar' specifically for a 'link' clause, which has to do + // some minor additional checks. + llvm::SmallVector CheckLinkClauseVarList(ArrayRef VarExpr); + + // Checking for the arguments specific to the declare-clause that need to be + // checked during both phases of template translation. + bool CheckDeclareClause(SemaOpenACC::OpenACCParsedClause &Clause); + + ExprResult ActOnRoutineName(Expr *RoutineName); /// Called while semantically analyzing the reduction clause, ensuring the var /// is the correct kind of reference. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index c998be34b9d89..5cb9998126a85 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1528,7 +1528,13 @@ enum DeclCode { // A decls specialization record. DECL_PARTIAL_SPECIALIZATIONS, - DECL_LAST = DECL_PARTIAL_SPECIALIZATIONS + // An OpenACCDeclareDecl record. + DECL_OPENACC_DECLARE, + + // An OpenACCRoutineDecl record. + DECL_OPENACC_ROUTINE, + + DECL_LAST = DECL_OPENACC_ROUTINE }; /// Record codes for each kind of statement or expression. @@ -2046,6 +2052,7 @@ enum StmtCode { STMT_OPENACC_SET_CONSTRUCT, STMT_OPENACC_UPDATE_CONSTRUCT, STMT_OPENACC_ATOMIC_CONSTRUCT, + STMT_OPENACC_CACHE_CONSTRUCT, // HLSL Constructs EXPR_HLSL_OUT_ARG, diff --git a/clang/include/clang/Serialization/ASTRecordReader.h b/clang/include/clang/Serialization/ASTRecordReader.h index 2561418b78ca7..ae7cd84fbc647 100644 --- a/clang/include/clang/Serialization/ASTRecordReader.h +++ b/clang/include/clang/Serialization/ASTRecordReader.h @@ -278,7 +278,8 @@ class ASTRecordReader /// Read an OpenACC clause, advancing Idx. OpenACCClause *readOpenACCClause(); - /// Read a list of OpenACC clauses into the passed SmallVector. + /// Read a list of OpenACC clauses into the passed SmallVector, during + /// statement reading. void readOpenACCClauseList(MutableArrayRef Clauses); /// Read a source location, advancing Idx. diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 410f841630660..c8895db914d13 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -213,13 +213,6 @@ def DereferenceModeling : Checker<"DereferenceModeling">, def NullDereferenceChecker : Checker<"NullDereference">, HelpText<"Check for dereferences of null pointers">, - CheckerOptions<[ - CmdLineOption - ]>, Documentation, Dependencies<[DereferenceModeling]>; @@ -285,6 +278,12 @@ def FixedAddressChecker : Checker<"FixedAddr">, HelpText<"Check for assignment of a fixed address to a pointer">, Documentation; +def FixedAddressDereferenceChecker + : Checker<"FixedAddressDereference">, + HelpText<"Check for dereferences of fixed addresses">, + Documentation, + Dependencies<[DereferenceModeling]>; + def PointerArithChecker : Checker<"PointerArithm">, HelpText<"Check for pointer arithmetic on locations other than array " "elements">, diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index b087ca8860690..2aa00db411844 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -395,6 +395,19 @@ ANALYZER_OPTION( "flex\" won't be analyzed.", true) +ANALYZER_OPTION( + bool, ShouldSuppressAddressSpaceDereferences, "suppress-dereferences-from-any-address-space", + "The analyzer does not report dereferences on memory that use " + "address space #256, #257, and #258. Those address spaces are used when " + "dereferencing address spaces relative to the GS, FS, and SS segments on " + "x86/x86-64 targets. Dereferencing a null pointer in these address spaces " + "is not defined as an error. All other null dereferences in other address " + "spaces are defined as an error unless explicitly defined. " + "When this option is turned on, the special behavior of address spaces " + "#256, #257, #258 is extended to all pointers with address spaces and on " + "any target.", + true) + //===----------------------------------------------------------------------===// // Unsigned analyzer options. //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index 9fd07ce47175c..804fc74b009df 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -498,6 +498,10 @@ class ExprEngine { void VisitInitListExpr(const InitListExpr *E, ExplodedNode *Pred, ExplodedNodeSet &Dst); + /// VisitAttributedStmt - Transfer function logic for AttributedStmt + void VisitAttributedStmt(const AttributedStmt *A, ExplodedNode *Pred, + ExplodedNodeSet &Dst); + /// VisitLogicalExpr - Transfer function logic for '&&', '||' void VisitLogicalExpr(const BinaryOperator* B, ExplodedNode *Pred, ExplodedNodeSet &Dst); diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h index f002f8645d3f6..816e122eb3003 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -67,7 +67,11 @@ enum class ScanningOptimizations { IgnoreCWD = (1 << 4), DSS_LAST_BITMASK_ENUM(IgnoreCWD), - Default = All + + // The build system needs to be aware that the current working + // directory is ignored. Without a good way of notifying the build + // system, it is less risky to default to off. + Default = All & (~IgnoreCWD) }; #undef DSS_LAST_BITMASK_ENUM diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index eaf0748395268..b74f67f0a9fed 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -66,6 +66,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenACC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/ExprCXX.h" diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 4cf6a48edd5e0..281fb7e14a57d 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -2843,6 +2843,8 @@ bool Compiler::VisitLambdaExpr(const LambdaExpr *E) { assert(Initializing); const Record *R = P.getOrCreateRecord(E->getLambdaClass()); + if (!R) + return false; auto *CaptureInitIt = E->capture_init_begin(); // Initialize all fields (which represent lambda captures) of the @@ -4087,9 +4089,8 @@ bool Compiler::visitZeroRecordInitializer(const Record *R, } else if (D->isRecord()) { if (!this->visitZeroRecordInitializer(D->ElemRecord, E)) return false; - } else { - assert(false); - } + } else + return false; if (!this->emitFinishInitPop(E)) return false; @@ -6039,14 +6040,12 @@ bool Compiler::VisitUnaryOperator(const UnaryOperator *E) { // We should already have a pointer when we get here. return this->delegate(SubExpr); case UO_Deref: // *x - if (DiscardResult) { - // assert(false); + if (DiscardResult) return this->discard(SubExpr); - } if (!this->visit(SubExpr)) return false; - if (classifyPrim(SubExpr) == PT_Ptr) + if (classifyPrim(SubExpr) == PT_Ptr && !E->getType()->isArrayType()) return this->emitNarrowPtr(E); return true; @@ -6326,7 +6325,7 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { if (auto It = Locals.find(D); It != Locals.end()) { const unsigned Offset = It->second.Offset; if (IsReference) - return this->emitGetLocal(PT_Ptr, Offset, E); + return this->emitGetLocal(classifyPrim(E), Offset, E); return this->emitGetPtrLocal(Offset, E); } else if (auto GlobalIndex = P.getGlobal(D)) { if (IsReference) { diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp index 6017f6dd61cb3..0f862583a37b1 100644 --- a/clang/lib/AST/ByteCode/Descriptor.cpp +++ b/clang/lib/AST/ByteCode/Descriptor.cpp @@ -367,10 +367,12 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD, } /// Arrays of composite elements. -Descriptor::Descriptor(const DeclTy &D, const Descriptor *Elem, MetadataSize MD, +Descriptor::Descriptor(const DeclTy &D, const Type *SourceTy, + const Descriptor *Elem, MetadataSize MD, unsigned NumElems, bool IsConst, bool IsTemporary, bool IsMutable) - : Source(D), ElemSize(Elem->getAllocSize() + sizeof(InlineDescriptor)), + : Source(D), SourceType(SourceTy), + ElemSize(Elem->getAllocSize() + sizeof(InlineDescriptor)), Size(ElemSize * NumElems), MDSize(MD.value_or(0)), AllocSize(std::max(alignof(void *), Size) + MDSize), ElemDesc(Elem), IsConst(IsConst), IsMutable(IsMutable), @@ -402,14 +404,16 @@ Descriptor::Descriptor(const DeclTy &D, const Record *R, MetadataSize MD, } /// Dummy. -Descriptor::Descriptor(const DeclTy &D) - : Source(D), ElemSize(1), Size(1), MDSize(0), AllocSize(MDSize), - ElemRecord(nullptr), IsConst(true), IsMutable(false), IsTemporary(false), - IsDummy(true) { +Descriptor::Descriptor(const DeclTy &D, MetadataSize MD) + : Source(D), ElemSize(1), Size(1), MDSize(MD.value_or(0)), + AllocSize(MDSize), ElemRecord(nullptr), IsConst(true), IsMutable(false), + IsTemporary(false), IsDummy(true) { assert(Source && "Missing source"); } QualType Descriptor::getType() const { + if (SourceType) + return QualType(SourceType, 0); if (const auto *D = asValueDecl()) return D->getType(); if (const auto *T = dyn_cast_if_present(asDecl())) diff --git a/clang/lib/AST/ByteCode/Descriptor.h b/clang/lib/AST/ByteCode/Descriptor.h index 01fa4b198de67..dfb008e4c8b8a 100644 --- a/clang/lib/AST/ByteCode/Descriptor.h +++ b/clang/lib/AST/ByteCode/Descriptor.h @@ -95,9 +95,9 @@ struct InlineDescriptor { /// Flag indicating if the field is the active member of a union. LLVM_PREFERRED_TYPE(bool) unsigned IsActive : 1; - /// Flat indicating if this field is in a union (even if nested). - unsigned InUnion : 1; + /// Flag indicating if this field is in a union (even if nested). LLVM_PREFERRED_TYPE(bool) + unsigned InUnion : 1; /// Flag indicating if the field is mutable (if in a record). LLVM_PREFERRED_TYPE(bool) unsigned IsFieldMutable : 1; @@ -124,6 +124,7 @@ struct Descriptor final { private: /// Original declaration, used to emit the error message. const DeclTy Source; + const Type *SourceType = nullptr; /// Size of an element, in host bytes. const unsigned ElemSize; /// Size of the storage, in host bytes. @@ -186,8 +187,9 @@ struct Descriptor final { bool IsTemporary, UnknownSize); /// Allocates a descriptor for an array of composites. - Descriptor(const DeclTy &D, const Descriptor *Elem, MetadataSize MD, - unsigned NumElems, bool IsConst, bool IsTemporary, bool IsMutable); + Descriptor(const DeclTy &D, const Type *SourceTy, const Descriptor *Elem, + MetadataSize MD, unsigned NumElems, bool IsConst, bool IsTemporary, + bool IsMutable); /// Allocates a descriptor for an array of composites of unknown size. Descriptor(const DeclTy &D, const Descriptor *Elem, MetadataSize MD, @@ -198,7 +200,7 @@ struct Descriptor final { bool IsTemporary, bool IsMutable); /// Allocates a dummy descriptor. - Descriptor(const DeclTy &D); + Descriptor(const DeclTy &D, MetadataSize MD = std::nullopt); /// Make this descriptor a dummy descriptor. void makeDummy() { IsDummy = true; } @@ -261,7 +263,7 @@ struct Descriptor final { bool isUnknownSizeArray() const { return Size == UnknownSizeMark; } /// Checks if the descriptor is of a primitive. - bool isPrimitive() const { return !IsArray && !ElemRecord; } + bool isPrimitive() const { return !IsArray && !ElemRecord && !IsDummy; } /// Checks if the descriptor is of an array. bool isArray() const { return IsArray; } diff --git a/clang/lib/AST/ByteCode/DynamicAllocator.cpp b/clang/lib/AST/ByteCode/DynamicAllocator.cpp index 3ef8c2e1f3e7c..728bd75d7d141 100644 --- a/clang/lib/AST/ByteCode/DynamicAllocator.cpp +++ b/clang/lib/AST/ByteCode/DynamicAllocator.cpp @@ -57,8 +57,10 @@ Block *DynamicAllocator::allocate(const Descriptor *ElementDesc, assert(ElementDesc->getMetadataSize() == 0); // Create a new descriptor for an array of the specified size and // element type. + // FIXME: Pass proper element type. const Descriptor *D = allocateDescriptor( - ElementDesc->asExpr(), ElementDesc, Descriptor::InlineDescMD, NumElements, + ElementDesc->asExpr(), nullptr, ElementDesc, Descriptor::InlineDescMD, + NumElements, /*IsConst=*/false, /*IsTemporary=*/false, /*IsMutable=*/false); return allocate(D, EvalID, AllocForm); } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 5e0d2e91fb1b2..1107c0c32792f 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -551,8 +551,8 @@ bool CheckInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, if (const auto *VD = Ptr.getDeclDesc()->asVarDecl(); VD && (VD->isConstexpr() || VD->hasGlobalStorage())) { - const SourceInfo &Loc = S.Current->getSource(OpPC); if (VD->getAnyInitializer()) { + const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_var_init_non_constant, 1) << VD; S.Note(VD->getLocation(), diag::note_declared_at); } else { @@ -714,6 +714,9 @@ bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { return false; } + if (S.checkingPotentialConstantExpression() && S.Current->getDepth() != 0) + return false; + if (F->isConstexpr() && F->hasBody() && (F->getDecl()->isConstexpr() || F->getDecl()->hasAttr())) return true; @@ -722,7 +725,6 @@ bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { if (F->isLambdaStaticInvoker()) return true; - const SourceLocation &Loc = S.Current->getLocation(OpPC); if (S.getLangOpts().CPlusPlus11) { const FunctionDecl *DiagDecl = F->getDecl(); @@ -748,7 +750,8 @@ bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { // or an inheriting constructor, we should be much more explicit about why // it's not constexpr. if (CD && CD->isInheritingConstructor()) { - S.FFDiag(Loc, diag::note_constexpr_invalid_inhctor, 1) + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_constexpr_invalid_inhctor, 1) << CD->getInheritedConstructor().getConstructor()->getParent(); S.Note(DiagDecl->getLocation(), diag::note_declared_at); } else { @@ -766,7 +769,8 @@ bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { DiagDecl->hasBody()) return false; - S.FFDiag(Loc, diag::note_constexpr_invalid_function, 1) + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_constexpr_invalid_function, 1) << DiagDecl->isConstexpr() << (bool)CD << DiagDecl; if (DiagDecl->getDefinition()) @@ -776,7 +780,8 @@ bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { S.Note(DiagDecl->getLocation(), diag::note_declared_at); } } else { - S.FFDiag(Loc, diag::note_invalid_subexpr_in_const_expr); + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_invalid_subexpr_in_const_expr); } return false; @@ -980,11 +985,6 @@ bool CheckNonNullArgs(InterpState &S, CodePtr OpPC, const Function *F, return true; } -// FIXME: This is similar to code we already have in Compiler.cpp. -// I think it makes sense to instead add the field and base destruction stuff -// to the destructor Function itself. Then destroying a record would really -// _just_ be calling its destructor. That would also help with the diagnostic -// difference when the destructor or a field/base fails. static bool runRecordDestructor(InterpState &S, CodePtr OpPC, const Pointer &BasePtr, const Descriptor *Desc) { @@ -992,7 +992,8 @@ static bool runRecordDestructor(InterpState &S, CodePtr OpPC, const Record *R = Desc->ElemRecord; assert(R); - if (Pointer::pointToSameBlock(BasePtr, S.Current->getThis())) { + if (Pointer::pointToSameBlock(BasePtr, S.Current->getThis()) && + S.Current->getFunction()->isDestructor()) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_double_destroy); return false; @@ -1094,8 +1095,8 @@ bool Free(InterpState &S, CodePtr OpPC, bool DeleteIsArrayForm, // For a class type with a virtual destructor, the selected operator delete // is the one looked up when building the destructor. - QualType AllocType = Ptr.getType(); if (!DeleteIsArrayForm && !IsGlobalDelete) { + QualType AllocType = Ptr.getType(); auto getVirtualOperatorDelete = [](QualType T) -> const FunctionDecl * { if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) if (const CXXDestructorDecl *DD = RD->getDestructor()) @@ -1343,6 +1344,9 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, } else { if (!CheckInvoke(S, OpPC, ThisPtr)) return cleanup(); + if (!Func->isConstructor() && + !CheckActive(S, OpPC, ThisPtr, AK_MemberCall)) + return false; } if (Func->isConstructor() && !checkConstructor(S, OpPC, Func, ThisPtr)) @@ -1514,7 +1518,7 @@ bool CallPtr(InterpState &S, CodePtr OpPC, uint32_t ArgSize, // This happens when the call expression has been cast to // something else, but we don't support that. if (S.Ctx.classify(F->getDecl()->getReturnType()) != - S.Ctx.classify(CE->getType())) + S.Ctx.classify(CE->getCallReturnType(S.getASTContext()))) return false; // Check argument nullability state. diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index db35208a02941..d8f90e45b0ced 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -1006,6 +1006,14 @@ inline bool CmpHelper(InterpState &S, CodePtr OpPC, CompareFn Fn) { } } +static inline bool IsOpaqueConstantCall(const CallExpr *E) { + unsigned Builtin = E->getBuiltinCallee(); + return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString || + Builtin == Builtin::BI__builtin___NSStringMakeConstantString || + Builtin == Builtin::BI__builtin_ptrauth_sign_constant || + Builtin == Builtin::BI__builtin_function_start); +} + template <> inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { using BoolT = PrimConv::T; @@ -1066,9 +1074,18 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { if (P.isZero()) continue; if (BothNonNull && P.pointsToLiteral()) { - const SourceInfo &Loc = S.Current->getSource(OpPC); - S.FFDiag(Loc, diag::note_constexpr_literal_comparison); - return false; + const Expr *E = P.getDeclDesc()->asExpr(); + if (isa(E)) { + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_literal_comparison); + return false; + } else if (const auto *CE = dyn_cast(E); + CE && IsOpaqueConstantCall(CE)) { + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_opaque_call_comparison) + << P.toDiagnosticString(S.getASTContext()); + return false; + } } } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b964906fb6594..df9c2bc24b15f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -397,21 +397,10 @@ static bool interp__builtin_fmin(InterpState &S, CodePtr OpPC, const Floating &LHS = getParam(Frame, 0); const Floating &RHS = getParam(Frame, 1); - Floating Result; - - if (IsNumBuiltin) { - Result = llvm::minimumnum(LHS.getAPFloat(), RHS.getAPFloat()); - } else { - // When comparing zeroes, return -0.0 if one of the zeroes is negative. - if (LHS.isZero() && RHS.isZero() && RHS.isNegative()) - Result = RHS; - else if (LHS.isNan() || RHS < LHS) - Result = RHS; - else - Result = LHS; - } - - S.Stk.push(Result); + if (IsNumBuiltin) + S.Stk.push(llvm::minimumnum(LHS.getAPFloat(), RHS.getAPFloat())); + else + S.Stk.push(minnum(LHS.getAPFloat(), RHS.getAPFloat())); return true; } @@ -421,21 +410,10 @@ static bool interp__builtin_fmax(InterpState &S, CodePtr OpPC, const Floating &LHS = getParam(Frame, 0); const Floating &RHS = getParam(Frame, 1); - Floating Result; - - if (IsNumBuiltin) { - Result = llvm::maximumnum(LHS.getAPFloat(), RHS.getAPFloat()); - } else { - // When comparing zeroes, return +0.0 if one of the zeroes is positive. - if (LHS.isZero() && RHS.isZero() && LHS.isNegative()) - Result = RHS; - else if (LHS.isNan() || RHS > LHS) - Result = RHS; - else - Result = LHS; - } - - S.Stk.push(Result); + if (IsNumBuiltin) + S.Stk.push(llvm::maximumnum(LHS.getAPFloat(), RHS.getAPFloat())); + else + S.Stk.push(maxnum(LHS.getAPFloat(), RHS.getAPFloat())); return true; } diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp index 92cfa192fd385..8abdc54b64677 100644 --- a/clang/lib/AST/ByteCode/Pointer.cpp +++ b/clang/lib/AST/ByteCode/Pointer.cpp @@ -210,7 +210,8 @@ APValue Pointer::toAPValue(const ASTContext &ASTCtx) const { }; bool UsePath = true; - if (getType()->isLValueReferenceType()) + if (const ValueDecl *VD = getDeclDesc()->asValueDecl(); + VD && VD->getType()->isLValueReferenceType()) UsePath = false; // Build the path into the object. @@ -248,7 +249,12 @@ APValue Pointer::toAPValue(const ASTContext &ASTCtx) const { Index = Ptr.getIndex(); QualType ElemType = Desc->getElemQualType(); - Offset += (Index * ASTCtx.getTypeSizeInChars(ElemType)); + if (const auto *RD = ElemType->getAsRecordDecl(); + RD && !RD->getDefinition()) { + // Ignore this for the offset. + } else { + Offset += (Index * ASTCtx.getTypeSizeInChars(ElemType)); + } if (Ptr.getArray().getType()->isArrayType()) Path.push_back(APValue::LValuePathEntry::ArrayIndex(Index)); Ptr = Ptr.getArray(); @@ -384,7 +390,6 @@ void Pointer::initialize() const { return; assert(PointeeStorage.BS.Pointee && "Cannot initialize null pointer"); - const Descriptor *Desc = getFieldDesc(); if (isRoot() && PointeeStorage.BS.Base == sizeof(GlobalInlineDescriptor)) { GlobalInlineDescriptor &GD = *reinterpret_cast( @@ -393,6 +398,7 @@ void Pointer::initialize() const { return; } + const Descriptor *Desc = getFieldDesc(); assert(Desc); if (Desc->isPrimitiveArray()) { // Primitive global arrays don't have an initmap. diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp index 0754e259b7cb3..c33d7fd7a2dc5 100644 --- a/clang/lib/AST/ByteCode/Program.cpp +++ b/clang/lib/AST/ByteCode/Program.cpp @@ -393,6 +393,7 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty, if (const auto *Record = getOrCreateRecord(RT->getDecl())) return allocateDescriptor(D, Record, MDSize, IsConst, IsTemporary, IsMutable); + return allocateDescriptor(D, MDSize); } // Arrays. @@ -419,7 +420,7 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty, unsigned ElemSize = ElemDesc->getAllocSize() + sizeof(InlineDescriptor); if (std::numeric_limits::max() / ElemSize <= NumElems) return {}; - return allocateDescriptor(D, ElemDesc, MDSize, NumElems, IsConst, + return allocateDescriptor(D, Ty, ElemDesc, MDSize, NumElems, IsConst, IsTemporary, IsMutable); } } diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt index cb13c5225b713..f6056e3935a63 100644 --- a/clang/lib/AST/CMakeLists.txt +++ b/clang/lib/AST/CMakeLists.txt @@ -50,6 +50,7 @@ add_clang_library(clangAST DeclFriend.cpp DeclGroup.cpp DeclObjC.cpp + DeclOpenACC.cpp DeclOpenMP.cpp DeclPrinter.cpp DeclTemplate.cpp diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index ab9d4869a74ee..6260b92733ab7 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -21,6 +21,7 @@ #include "clang/AST/DeclContextInternals.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenACC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DependentDiagnostic.h" @@ -992,6 +993,8 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) { case LifetimeExtendedTemporary: case RequiresExprBody: case ImplicitConceptSpecialization: + case OpenACCDeclare: + case OpenACCRoutine: // Never looked up by name. return 0; } diff --git a/clang/lib/AST/DeclOpenACC.cpp b/clang/lib/AST/DeclOpenACC.cpp new file mode 100644 index 0000000000000..f1e6770d83187 --- /dev/null +++ b/clang/lib/AST/DeclOpenACC.cpp @@ -0,0 +1,52 @@ +//===--- DeclOpenACC.cpp - Classes for OpenACC Constructs -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the subclasses of Decl class declared in Decl.h +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/DeclOpenACC.h" +#include "clang/AST/ASTContext.h" + +using namespace clang; + +OpenACCDeclareDecl * +OpenACCDeclareDecl::Create(ASTContext &Ctx, DeclContext *DC, + SourceLocation StartLoc, SourceLocation DirLoc, + SourceLocation EndLoc, + ArrayRef Clauses) { + return new (Ctx, DC, + additionalSizeToAlloc(Clauses.size())) + OpenACCDeclareDecl(DC, StartLoc, DirLoc, EndLoc, Clauses); +} + +OpenACCDeclareDecl * +OpenACCDeclareDecl::CreateDeserialized(ASTContext &Ctx, GlobalDeclID ID, + unsigned NumClauses) { + return new (Ctx, ID, additionalSizeToAlloc(NumClauses)) + OpenACCDeclareDecl(NumClauses); +} + +OpenACCRoutineDecl * +OpenACCRoutineDecl::Create(ASTContext &Ctx, DeclContext *DC, + SourceLocation StartLoc, SourceLocation DirLoc, + SourceLocation LParenLoc, Expr *FuncRef, + SourceLocation RParenLoc, SourceLocation EndLoc, + ArrayRef Clauses) { + return new (Ctx, DC, + additionalSizeToAlloc(Clauses.size())) + OpenACCRoutineDecl(DC, StartLoc, DirLoc, LParenLoc, FuncRef, RParenLoc, + EndLoc, Clauses); +} + +OpenACCRoutineDecl * +OpenACCRoutineDecl::CreateDeserialized(ASTContext &Ctx, GlobalDeclID ID, + unsigned NumClauses) { + return new (Ctx, ID, additionalSizeToAlloc(NumClauses)) + OpenACCRoutineDecl(NumClauses); +} diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp index 0d51fdbc7e126..0b59fa5aec1be 100644 --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -112,6 +112,9 @@ namespace { void VisitNonTypeTemplateParmDecl(const NonTypeTemplateParmDecl *NTTP); void VisitHLSLBufferDecl(HLSLBufferDecl *D); + void VisitOpenACCDeclareDecl(OpenACCDeclareDecl *D); + void VisitOpenACCRoutineDecl(OpenACCRoutineDecl *D); + void printTemplateParameters(const TemplateParameterList *Params, bool OmitTemplateKW = false); void printTemplateArguments(llvm::ArrayRef Args, @@ -495,6 +498,8 @@ void DeclPrinter::VisitDeclContext(DeclContext *DC, bool Indent) { isa(*D) || isa(*D) || isa(*D)) Terminator = nullptr; + else if (isa(*D)) + Terminator = nullptr; else if (isa(*D) && cast(*D)->hasBody()) Terminator = nullptr; else if (auto FD = dyn_cast(*D)) { @@ -1910,3 +1915,39 @@ void DeclPrinter::VisitNonTypeTemplateParmDecl( /*IncludeType=*/false); } } + +void DeclPrinter::VisitOpenACCDeclareDecl(OpenACCDeclareDecl *D) { + if (!D->isInvalidDecl()) { + Out << "#pragma acc declare"; + if (!D->clauses().empty()) { + Out << ' '; + OpenACCClausePrinter Printer(Out, Policy); + Printer.VisitClauseList(D->clauses()); + } + } +} +void DeclPrinter::VisitOpenACCRoutineDecl(OpenACCRoutineDecl *D) { + if (!D->isInvalidDecl()) { + Out << "#pragma acc routine"; + + if (D->hasNameSpecified()) { + Out << "("; + + // The referenced function was named here, but this makes us tolerant of + // errors. + if (D->getFunctionReference()) + D->getFunctionReference()->printPretty(Out, nullptr, Policy, + Indentation, "\n", &Context); + else + Out << ""; + + Out << ")"; + } + + if (!D->clauses().empty()) { + Out << ' '; + OpenACCClausePrinter Printer(Out, Policy); + Printer.VisitClauseList(D->clauses()); + } + } +} diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index b747aa8df807d..ccfec7fda0cbc 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -747,7 +747,7 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK, if (const CXXMethodDecl *MD = dyn_cast(FD)) { if (MD->isVirtual() && IK != PredefinedIdentKind::PrettyFunctionNoVirtual) Out << "virtual "; - if (MD->isStatic()) + if (MD->isStatic() && !ForceElaboratedPrinting) Out << "static "; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ef1d763009453..d9a1e5bb42343 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3628,8 +3628,6 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E, if (AllowConstexprUnknown) { if (!Result) Result = &Info.CurrentCall->createConstexprUnknownAPValues(VD, Base); - else - Result->setConstexprUnknown(); } return true; } @@ -15604,16 +15602,11 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) { case Builtin::BI__builtin_fmaxl: case Builtin::BI__builtin_fmaxf16: case Builtin::BI__builtin_fmaxf128: { - // TODO: Handle sNaN. APFloat RHS(0.); if (!EvaluateFloat(E->getArg(0), Result, Info) || !EvaluateFloat(E->getArg(1), RHS, Info)) return false; - // When comparing zeroes, return +0.0 if one of the zeroes is positive. - if (Result.isZero() && RHS.isZero() && Result.isNegative()) - Result = RHS; - else if (Result.isNaN() || RHS > Result) - Result = RHS; + Result = maxnum(Result, RHS); return true; } @@ -15622,16 +15615,11 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) { case Builtin::BI__builtin_fminl: case Builtin::BI__builtin_fminf16: case Builtin::BI__builtin_fminf128: { - // TODO: Handle sNaN. APFloat RHS(0.); if (!EvaluateFloat(E->getArg(0), Result, Info) || !EvaluateFloat(E->getArg(1), RHS, Info)) return false; - // When comparing zeroes, return -0.0 if one of the zeroes is negative. - if (Result.isZero() && RHS.isZero() && RHS.isNegative()) - Result = RHS; - else if (Result.isNaN() || RHS < Result) - Result = RHS; + Result = minnum(Result, RHS); return true; } @@ -17005,6 +16993,18 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, if (!Info.discardCleanups()) llvm_unreachable("Unhandled cleanup; missing full expression marker?"); + + if (Value.allowConstexprUnknown()) { + assert(Value.isLValue() && "Expected an lvalue"); + auto Base = Value.getLValueBase(); + const auto *NewVD = Base.dyn_cast(); + if (!NewVD) + NewVD = VD; + Info.FFDiag(getExprLoc(), diag::note_constexpr_var_init_non_constant, 1) + << NewVD; + NoteLValueLocation(Info, Base); + return false; + } } return CheckConstantExpression(Info, DeclLoc, DeclTy, Value, diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 4a090118c3d7b..b80dd1c86092f 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3489,6 +3489,20 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) { case CC_M68kRTD: case CC_PreserveNone: case CC_RISCVVectorCall: +#define CC_VLS_CASE(ABI_VLEN) case CC_RISCVVLSCall_##ABI_VLEN: + CC_VLS_CASE(32) + CC_VLS_CASE(64) + CC_VLS_CASE(128) + CC_VLS_CASE(256) + CC_VLS_CASE(512) + CC_VLS_CASE(1024) + CC_VLS_CASE(2048) + CC_VLS_CASE(4096) + CC_VLS_CASE(8192) + CC_VLS_CASE(16384) + CC_VLS_CASE(32768) + CC_VLS_CASE(65536) +#undef CC_VLS_CASE // FIXME: we should be mangling all of the above. return ""; @@ -6000,6 +6014,8 @@ void CXXNameMangler::mangleCXXDtorType(CXXDtorType T) { case Dtor_Comdat: Out << "D5"; break; + case Dtor_VectorDeleting: + llvm_unreachable("Itanium ABI does not use vector deleting dtors"); } } diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index 27fd214dcee3b..e5e7bd31f73e9 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -1362,6 +1362,9 @@ void JSONNodeDumper::VisitSYCLUniqueStableNameExpr( void JSONNodeDumper::VisitOpenACCAsteriskSizeExpr( const OpenACCAsteriskSizeExpr *E) {} +void JSONNodeDumper::VisitOpenACCDeclareDecl(const OpenACCDeclareDecl *D) {} +void JSONNodeDumper::VisitOpenACCRoutineDecl(const OpenACCRoutineDecl *D) {} + void JSONNodeDumper::VisitPredefinedExpr(const PredefinedExpr *PE) { JOS.attribute("name", PredefinedExpr::getIdentKindName(PE->getIdentKind())); } diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp index 15be9c62bf888..4c4f2038c51e6 100644 --- a/clang/lib/AST/Mangle.cpp +++ b/clang/lib/AST/Mangle.cpp @@ -74,7 +74,7 @@ static CCMangling getCallingConvMangling(const ASTContext &Context, if (FD->isMain() && FD->getNumParams() == 2) return CCM_WasmMainArgcArgv; - if (!Triple.isOSWindows() || !Triple.isX86()) + if (!TI.shouldUseMicrosoftCCforMangling()) return CCM_Other; if (Context.getLangOpts().CPlusPlus && !isExternC(ND) && diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index fe34251688a98..6e2be2557d0a7 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1484,8 +1484,9 @@ void MicrosoftCXXNameMangler::mangleCXXDtorType(CXXDtorType T) { // ::= ?_G # scalar deleting destructor case Dtor_Deleting: Out << "?_G"; return; // ::= ?_E # vector deleting destructor - // FIXME: Add a vector deleting dtor type. It goes in the vtable, so we need - // it. + case Dtor_VectorDeleting: + Out << "?_E"; + return; case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT"); } @@ -2886,9 +2887,12 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, // ::= @ # structors (they have no declared return type) if (IsStructor) { if (isa(D) && isStructorDecl(D)) { - // The scalar deleting destructor takes an extra int argument which is not - // reflected in the AST. - if (StructorType == Dtor_Deleting) { + // The deleting destructors take an extra argument of type int that + // indicates whether the storage for the object should be deleted and + // whether a single object or an array of objects is being destroyed. This + // extra argument is not reflected in the AST. + if (StructorType == Dtor_Deleting || + StructorType == Dtor_VectorDeleting) { Out << (PointersAre64Bit ? "PEAXI@Z" : "PAXI@Z"); return; } @@ -3861,10 +3865,10 @@ void MicrosoftMangleContextImpl::mangleCXXDtorThunk(const CXXDestructorDecl *DD, const ThunkInfo &Thunk, bool /*ElideOverrideInfo*/, raw_ostream &Out) { - // FIXME: Actually, the dtor thunk should be emitted for vector deleting - // dtors rather than scalar deleting dtors. Just use the vector deleting dtor - // mangling manually until we support both deleting dtor types. - assert(Type == Dtor_Deleting); + // The dtor thunk should use vector deleting dtor mangling, however as an + // optimization we may end up emitting only scalar deleting dtor body, so just + // use the vector deleting dtor mangling manually. + assert(Type == Dtor_Deleting || Type == Dtor_VectorDeleting); msvc_hashing_ostream MHO(Out); MicrosoftCXXNameMangler Mangler(*this, MHO, DD, Type); Mangler.getStream() << "??_E"; diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp index aa14ab902ba66..fd2c38a0e64e7 100644 --- a/clang/lib/AST/OpenACCClause.cpp +++ b/clang/lib/AST/OpenACCClause.cpp @@ -40,6 +40,8 @@ bool OpenACCClauseWithVarList::classof(const OpenACCClause *C) { OpenACCCopyInClause::classof(C) || OpenACCCopyOutClause::classof(C) || OpenACCReductionClause::classof(C) || OpenACCCreateClause::classof(C) || OpenACCDeviceClause::classof(C) || + OpenACCLinkClause::classof(C) || + OpenACCDeviceResidentClause::classof(C) || OpenACCHostClause::classof(C); } bool OpenACCClauseWithCondition::classof(const OpenACCClause *C) { @@ -438,6 +440,25 @@ OpenACCCopyClause::Create(const ASTContext &C, OpenACCClauseKind Spelling, OpenACCCopyClause(Spelling, BeginLoc, LParenLoc, VarList, EndLoc); } +OpenACCLinkClause *OpenACCLinkClause::Create(const ASTContext &C, + SourceLocation BeginLoc, + SourceLocation LParenLoc, + ArrayRef VarList, + SourceLocation EndLoc) { + void *Mem = + C.Allocate(OpenACCLinkClause::totalSizeToAlloc(VarList.size())); + return new (Mem) OpenACCLinkClause(BeginLoc, LParenLoc, VarList, EndLoc); +} + +OpenACCDeviceResidentClause *OpenACCDeviceResidentClause::Create( + const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc, + ArrayRef VarList, SourceLocation EndLoc) { + void *Mem = C.Allocate( + OpenACCDeviceResidentClause::totalSizeToAlloc(VarList.size())); + return new (Mem) + OpenACCDeviceResidentClause(BeginLoc, LParenLoc, VarList, EndLoc); +} + OpenACCCopyInClause * OpenACCCopyInClause::Create(const ASTContext &C, OpenACCClauseKind Spelling, SourceLocation BeginLoc, SourceLocation LParenLoc, @@ -513,6 +534,13 @@ OpenACCSeqClause *OpenACCSeqClause::Create(const ASTContext &C, return new (Mem) OpenACCSeqClause(BeginLoc, EndLoc); } +OpenACCNoHostClause *OpenACCNoHostClause::Create(const ASTContext &C, + SourceLocation BeginLoc, + SourceLocation EndLoc) { + void *Mem = C.Allocate(sizeof(OpenACCNoHostClause)); + return new (Mem) OpenACCNoHostClause(BeginLoc, EndLoc); +} + OpenACCGangClause * OpenACCGangClause::Create(const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc, @@ -754,6 +782,21 @@ void OpenACCClausePrinter::VisitCopyClause(const OpenACCCopyClause &C) { OS << ")"; } +void OpenACCClausePrinter::VisitLinkClause(const OpenACCLinkClause &C) { + OS << "link("; + llvm::interleaveComma(C.getVarList(), OS, + [&](const Expr *E) { printExpr(E); }); + OS << ")"; +} + +void OpenACCClausePrinter::VisitDeviceResidentClause( + const OpenACCDeviceResidentClause &C) { + OS << "device_resident("; + llvm::interleaveComma(C.getVarList(), OS, + [&](const Expr *E) { printExpr(E); }); + OS << ")"; +} + void OpenACCClausePrinter::VisitCopyInClause(const OpenACCCopyInClause &C) { OS << C.getClauseKind() << '('; if (C.isReadOnly()) @@ -835,6 +878,10 @@ void OpenACCClausePrinter::VisitSeqClause(const OpenACCSeqClause &C) { OS << "seq"; } +void OpenACCClausePrinter::VisitNoHostClause(const OpenACCNoHostClause &C) { + OS << "nohost"; +} + void OpenACCClausePrinter::VisitCollapseClause(const OpenACCCollapseClause &C) { OS << "collapse("; if (C.hasForce()) diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp index 11eab0c27579d..8a86074fe68a0 100644 --- a/clang/lib/AST/StmtOpenACC.cpp +++ b/clang/lib/AST/StmtOpenACC.cpp @@ -321,3 +321,21 @@ OpenACCAtomicConstruct *OpenACCAtomicConstruct::Create( OpenACCAtomicConstruct(Start, DirectiveLoc, AtKind, End, AssociatedStmt); return Inst; } +OpenACCCacheConstruct *OpenACCCacheConstruct::CreateEmpty(const ASTContext &C, + unsigned NumVars) { + void *Mem = + C.Allocate(OpenACCCacheConstruct::totalSizeToAlloc(NumVars)); + auto *Inst = new (Mem) OpenACCCacheConstruct(NumVars); + return Inst; +} + +OpenACCCacheConstruct *OpenACCCacheConstruct::Create( + const ASTContext &C, SourceLocation Start, SourceLocation DirectiveLoc, + SourceLocation LParenLoc, SourceLocation ReadOnlyLoc, + ArrayRef VarList, SourceLocation RParenLoc, SourceLocation End) { + void *Mem = C.Allocate( + OpenACCCacheConstruct::totalSizeToAlloc(VarList.size())); + auto *Inst = new (Mem) OpenACCCacheConstruct( + Start, DirectiveLoc, LParenLoc, ReadOnlyLoc, VarList, RParenLoc, End); + return Inst; +} diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index c8ea7b52a6241..facdc4104c374 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -17,6 +17,7 @@ #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenACC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" @@ -262,7 +263,11 @@ void StmtPrinter::VisitNullStmt(NullStmt *Node) { void StmtPrinter::VisitDeclStmt(DeclStmt *Node) { Indent(); PrintRawDeclStmt(Node); - OS << ";" << NL; + // Certain pragma declarations shouldn't have a semi-colon after them. + if (!Node->isSingleDecl() || + !isa(Node->getSingleDecl())) + OS << ";"; + OS << NL; } void StmtPrinter::VisitCompoundStmt(CompoundStmt *Node) { @@ -1257,6 +1262,18 @@ void StmtPrinter::VisitOpenACCAtomicConstruct(OpenACCAtomicConstruct *S) { PrintStmt(S->getAssociatedStmt()); } +void StmtPrinter::VisitOpenACCCacheConstruct(OpenACCCacheConstruct *S) { + Indent() << "#pragma acc cache("; + if (S->hasReadOnly()) + OS << "readonly: "; + + llvm::interleaveComma(S->getVarList(), OS, [&](const Expr *E) { + E->printPretty(OS, nullptr, Policy); + }); + + OS << ")\n"; +} + //===----------------------------------------------------------------------===// // Expr printing methods. //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 2603df25ba2a4..574f67f4274e7 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2550,6 +2550,16 @@ void OpenACCClauseProfiler::VisitIfClause(const OpenACCIfClause &Clause) { void OpenACCClauseProfiler::VisitCopyClause(const OpenACCCopyClause &Clause) { VisitClauseWithVarList(Clause); } + +void OpenACCClauseProfiler::VisitLinkClause(const OpenACCLinkClause &Clause) { + VisitClauseWithVarList(Clause); +} + +void OpenACCClauseProfiler::VisitDeviceResidentClause( + const OpenACCDeviceResidentClause &Clause) { + VisitClauseWithVarList(Clause); +} + void OpenACCClauseProfiler::VisitCopyInClause( const OpenACCCopyInClause &Clause) { VisitClauseWithVarList(Clause); @@ -2698,6 +2708,7 @@ void OpenACCClauseProfiler::VisitWaitClause(const OpenACCWaitClause &Clause) { for (auto *E : Clause.getQueueIdExprs()) Profiler.VisitStmt(E); } + /// Nothing to do here, there are no sub-statements. void OpenACCClauseProfiler::VisitDeviceTypeClause( const OpenACCDeviceTypeClause &Clause) {} @@ -2708,6 +2719,8 @@ void OpenACCClauseProfiler::VisitIndependentClause( const OpenACCIndependentClause &Clause) {} void OpenACCClauseProfiler::VisitSeqClause(const OpenACCSeqClause &Clause) {} +void OpenACCClauseProfiler::VisitNoHostClause( + const OpenACCNoHostClause &Clause) {} void OpenACCClauseProfiler::VisitGangClause(const OpenACCGangClause &Clause) { for (unsigned I = 0; I < Clause.getNumExprs(); ++I) { @@ -2786,6 +2799,11 @@ void StmtProfiler::VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *S) { P.VisitOpenACCClauseList(S->clauses()); } +void StmtProfiler::VisitOpenACCCacheConstruct(const OpenACCCacheConstruct *S) { + // VisitStmt covers 'children', so the exprs inside of it are covered. + VisitStmt(S); +} + void StmtProfiler::VisitOpenACCInitConstruct(const OpenACCInitConstruct *S) { VisitStmt(S); OpenACCClauseProfiler P{*this}; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index fd1eaab9621dd..91f3f14c6b454 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -417,10 +417,13 @@ void TextNodeDumper::Visit(const OpenACCClause *C) { case OpenACCClauseKind::Device: case OpenACCClauseKind::DeviceNum: case OpenACCClauseKind::DefaultAsync: + case OpenACCClauseKind::DeviceResident: case OpenACCClauseKind::DevicePtr: case OpenACCClauseKind::Finalize: case OpenACCClauseKind::FirstPrivate: + case OpenACCClauseKind::Link: case OpenACCClauseKind::NoCreate: + case OpenACCClauseKind::NoHost: case OpenACCClauseKind::NumGangs: case OpenACCClauseKind::NumWorkers: case OpenACCClauseKind::Present: @@ -3040,6 +3043,12 @@ void TextNodeDumper::VisitOpenACCHostDataConstruct( void TextNodeDumper::VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *S) { VisitOpenACCConstructStmt(S); } +void TextNodeDumper::VisitOpenACCCacheConstruct( + const OpenACCCacheConstruct *S) { + VisitOpenACCConstructStmt(S); + if (S->hasReadOnly()) + OS <<" readonly"; +} void TextNodeDumper::VisitOpenACCInitConstruct(const OpenACCInitConstruct *S) { VisitOpenACCConstructStmt(S); } @@ -3061,6 +3070,34 @@ void TextNodeDumper::VisitOpenACCAtomicConstruct( OS << ' ' << S->getAtomicKind(); } +void TextNodeDumper::VisitOpenACCDeclareDecl(const OpenACCDeclareDecl *D) { + OS << " " << D->getDirectiveKind(); + + for (const OpenACCClause *C : D->clauses()) + AddChild([=] { + Visit(C); + for (const Stmt *S : C->children()) + AddChild([=] { Visit(S); }); + }); +} +void TextNodeDumper::VisitOpenACCRoutineDecl(const OpenACCRoutineDecl *D) { + OS << " " << D->getDirectiveKind(); + + if (D->hasNameSpecified()) { + OS << " name_specified"; + dumpSourceRange(SourceRange{D->getLParenLoc(), D->getRParenLoc()}); + } + + AddChild([=] { Visit(D->getFunctionReference()); }); + + for (const OpenACCClause *C : D->clauses()) + AddChild([=] { + Visit(C); + for (const Stmt *S : C->children()) + AddChild([=] { Visit(S); }); + }); +} + void TextNodeDumper::VisitEmbedExpr(const EmbedExpr *S) { AddChild("begin", [=] { OS << S->getStartingElementPos(); }); AddChild("number of elements", [=] { OS << S->getDataElementCount(); }); diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 8c11ec2e1fe24..2fd7f5800594a 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3559,6 +3559,21 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) { case CC_PreserveNone: return "preserve_none"; // clang-format off case CC_RISCVVectorCall: return "riscv_vector_cc"; +#define CC_VLS_CASE(ABI_VLEN) \ + case CC_RISCVVLSCall_##ABI_VLEN: return "riscv_vls_cc(" #ABI_VLEN ")"; + CC_VLS_CASE(32) + CC_VLS_CASE(64) + CC_VLS_CASE(128) + CC_VLS_CASE(256) + CC_VLS_CASE(512) + CC_VLS_CASE(1024) + CC_VLS_CASE(2048) + CC_VLS_CASE(4096) + CC_VLS_CASE(8192) + CC_VLS_CASE(16384) + CC_VLS_CASE(32768) + CC_VLS_CASE(65536) +#undef CC_VLS_CASE // clang-format on } @@ -4226,6 +4241,7 @@ bool AttributedType::isCallingConv() const { case attr::M68kRTD: case attr::PreserveNone: case attr::RISCVVectorCC: + case attr::RISCVVLSCC: return true; } llvm_unreachable("invalid attr kind"); @@ -5098,6 +5114,10 @@ bool Type::hasSizedVLAType() const { return false; } +bool Type::isHLSLResourceRecord() const { + return HLSLAttributedResourceType::findHandleTypeOnResource(this) != nullptr; +} + bool Type::isHLSLIntangibleType() const { const Type *Ty = getUnqualifiedDesugaredType(); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 31695374cb52b..8762cc7b1e4e1 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1136,6 +1136,23 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, case CC_RISCVVectorCall: OS << "__attribute__((riscv_vector_cc))"; break; +#define CC_VLS_CASE(ABI_VLEN) \ + case CC_RISCVVLSCall_##ABI_VLEN: \ + OS << "__attribute__((riscv_vls_cc" #ABI_VLEN "))"; \ + break; + CC_VLS_CASE(32) + CC_VLS_CASE(64) + CC_VLS_CASE(128) + CC_VLS_CASE(256) + CC_VLS_CASE(512) + CC_VLS_CASE(1024) + CC_VLS_CASE(2048) + CC_VLS_CASE(4096) + CC_VLS_CASE(8192) + CC_VLS_CASE(16384) + CC_VLS_CASE(32768) + CC_VLS_CASE(65536) +#undef CC_VLS_CASE } } @@ -2064,6 +2081,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, case attr::RISCVVectorCC: OS << "riscv_vector_cc"; break; + case attr::RISCVVLSCC: + OS << "riscv_vls_cc"; + break; case attr::NoDeref: OS << "noderef"; break; diff --git a/clang/lib/AST/VTableBuilder.cpp b/clang/lib/AST/VTableBuilder.cpp index 18893b996b5d6..b7c6ad85b8889 100644 --- a/clang/lib/AST/VTableBuilder.cpp +++ b/clang/lib/AST/VTableBuilder.cpp @@ -1735,8 +1735,8 @@ void ItaniumVTableBuilder::LayoutPrimaryAndSecondaryVTables( const CXXMethodDecl *MD = I.first; const MethodInfo &MI = I.second; if (const CXXDestructorDecl *DD = dyn_cast(MD)) { - MethodVTableIndices[GlobalDecl(DD, Dtor_Complete)] - = MI.VTableIndex - AddressPoint; + MethodVTableIndices[GlobalDecl(DD, Dtor_Complete)] = + MI.VTableIndex - AddressPoint; MethodVTableIndices[GlobalDecl(DD, Dtor_Deleting)] = MI.VTableIndex + 1 - AddressPoint; } else { @@ -2657,7 +2657,11 @@ class VFTableBuilder { MethodVFTableLocation Loc(MI.VBTableIndex, WhichVFPtr.getVBaseWithVPtr(), WhichVFPtr.NonVirtualOffset, MI.VFTableIndex); if (const CXXDestructorDecl *DD = dyn_cast(MD)) { - MethodVFTableLocations[GlobalDecl(DD, Dtor_Deleting)] = Loc; + // In Microsoft ABI vftable always references vector deleting dtor. + CXXDtorType DtorTy = Context.getTargetInfo().getCXXABI().isMicrosoft() + ? Dtor_VectorDeleting + : Dtor_Deleting; + MethodVFTableLocations[GlobalDecl(DD, DtorTy)] = Loc; } else { MethodVFTableLocations[MD] = Loc; } @@ -3287,7 +3291,10 @@ void VFTableBuilder::dumpLayout(raw_ostream &Out) { const CXXDestructorDecl *DD = Component.getDestructorDecl(); DD->printQualifiedName(Out); - Out << "() [scalar deleting]"; + if (Context.getTargetInfo().getCXXABI().isMicrosoft()) + Out << "() [vector deleting]"; + else + Out << "() [scalar deleting]"; if (DD->isPureVirtual()) Out << " [pure]"; @@ -3758,7 +3765,7 @@ void MicrosoftVTableContext::dumpMethodLocations( PredefinedIdentKind::PrettyFunctionNoVirtual, MD); if (isa(MD)) { - IndicesMap[I.second] = MethodName + " [scalar deleting]"; + IndicesMap[I.second] = MethodName + " [vector deleting]"; } else { IndicesMap[I.second] = MethodName; } @@ -3875,7 +3882,7 @@ MicrosoftVTableContext::getMethodVFTableLocation(GlobalDecl GD) { assert(hasVtableSlot(cast(GD.getDecl())) && "Only use this method for virtual methods or dtors"); if (isa(GD.getDecl())) - assert(GD.getDtorType() == Dtor_Deleting); + assert(GD.getDtorType() == Dtor_VectorDeleting); GD = GD.getCanonicalDecl(); diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp index 3d01a70395a9b..e9ec7eff1e0ab 100644 --- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp +++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp @@ -1287,6 +1287,27 @@ class MatchASTVisitor : public RecursiveASTVisitor, auto Aliases = TypeAliases.find(CanonicalType); if (Aliases == TypeAliases.end()) return false; + + if (const auto *ElaboratedTypeNode = + llvm::dyn_cast(TypeNode)) { + if (ElaboratedTypeNode->isSugared() && Aliases->second.size() > 1) { + const auto &DesugaredTypeName = + ElaboratedTypeNode->desugar().getAsString(); + + for (const TypedefNameDecl *Alias : Aliases->second) { + if (Alias->getName() != DesugaredTypeName) { + continue; + } + + BoundNodesTreeBuilder Result(*Builder); + if (Matcher.matches(*Alias, this, &Result)) { + *Builder = std::move(Result); + return true; + } + } + } + } + for (const TypedefNameDecl *Alias : Aliases->second) { BoundNodesTreeBuilder Result(*Builder); if (Matcher.matches(*Alias, this, &Result)) { diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index 3e144395cffc6..9af1e915482da 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -433,7 +433,7 @@ class reverse_children { ArrayRef children; public: - reverse_children(Stmt *S); + reverse_children(Stmt *S, ASTContext &Ctx); using iterator = ArrayRef::reverse_iterator; @@ -443,21 +443,44 @@ class reverse_children { } // namespace -reverse_children::reverse_children(Stmt *S) { +reverse_children::reverse_children(Stmt *S, ASTContext &Ctx) { if (CallExpr *CE = dyn_cast(S)) { children = CE->getRawSubExprs(); return; } + switch (S->getStmtClass()) { - // Note: Fill in this switch with more cases we want to optimize. - case Stmt::InitListExprClass: { - InitListExpr *IE = cast(S); - children = llvm::ArrayRef(reinterpret_cast(IE->getInits()), - IE->getNumInits()); - return; + // Note: Fill in this switch with more cases we want to optimize. + case Stmt::InitListExprClass: { + InitListExpr *IE = cast(S); + children = llvm::ArrayRef(reinterpret_cast(IE->getInits()), + IE->getNumInits()); + return; + } + + case Stmt::AttributedStmtClass: { + // For an attributed stmt, the "children()" returns only the NullStmt + // (;) but semantically the "children" are supposed to be the + // expressions _within_ i.e. the two square brackets i.e. [[ HERE ]] + // so we add the subexpressions first, _then_ add the "children" + auto *AS = cast(S); + for (const auto *Attr : AS->getAttrs()) { + if (const auto *AssumeAttr = dyn_cast(Attr)) { + Expr *AssumeExpr = AssumeAttr->getAssumption(); + if (!AssumeExpr->HasSideEffects(Ctx)) { + childrenBuf.push_back(AssumeExpr); + } + } } - default: - break; + + // Visit the actual children AST nodes. + // For CXXAssumeAttrs, this is always a NullStmt. + llvm::append_range(childrenBuf, AS->children()); + children = childrenBuf; + return; + } + default: + break; } // Default case for all other statements. @@ -2433,7 +2456,7 @@ CFGBlock *CFGBuilder::VisitChildren(Stmt *S) { // Visit the children in their reverse order so that they appear in // left-to-right (natural) order in the CFG. - reverse_children RChildren(S); + reverse_children RChildren(S, *Context); for (Stmt *Child : RChildren) { if (Child) if (CFGBlock *R = Visit(Child)) @@ -2449,7 +2472,7 @@ CFGBlock *CFGBuilder::VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc) { } CFGBlock *B = Block; - reverse_children RChildren(ILE); + reverse_children RChildren(ILE, *Context); for (Stmt *Child : RChildren) { if (!Child) continue; @@ -2484,6 +2507,14 @@ static bool isFallthroughStatement(const AttributedStmt *A) { return isFallthrough; } +static bool isCXXAssumeAttr(const AttributedStmt *A) { + bool hasAssumeAttr = hasSpecificAttr(A->getAttrs()); + + assert((!hasAssumeAttr || isa(A->getSubStmt())) && + "expected [[assume]] not to have children"); + return hasAssumeAttr; +} + CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A, AddStmtChoice asc) { // AttributedStmts for [[likely]] can have arbitrary statements as children, @@ -2494,7 +2525,8 @@ CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A, // So only add the AttributedStmt for FallThrough, which has CFG effects and // also no children, and omit the others. None of the other current StmtAttrs // have semantic meaning for the CFG. - if (isFallthroughStatement(A) && asc.alwaysAdd(*this, A)) { + bool isInterestingAttribute = isFallthroughStatement(A) || isCXXAssumeAttr(A); + if (isInterestingAttribute && asc.alwaysAdd(*this, A)) { autoCreateBlock(); appendStmt(Block, A); } @@ -2700,6 +2732,16 @@ static bool CanThrow(Expr *E, ASTContext &Ctx) { return true; } +static bool isBuiltinAssumeWithSideEffects(const ASTContext &Ctx, + const CallExpr *CE) { + unsigned BuiltinID = CE->getBuiltinCallee(); + if (BuiltinID != Builtin::BI__assume && + BuiltinID != Builtin::BI__builtin_assume) + return false; + + return CE->getArg(0)->HasSideEffects(Ctx); +} + CFGBlock *CFGBuilder::VisitCallExpr(CallExpr *C, AddStmtChoice asc) { // Compute the callee type. QualType calleeType = C->getCallee()->getType(); @@ -2738,7 +2780,8 @@ CFGBlock *CFGBuilder::VisitCallExpr(CallExpr *C, AddStmtChoice asc) { NoReturn = true; if (FD->hasAttr()) AddEHEdge = false; - if (FD->getBuiltinID() == Builtin::BI__builtin_object_size || + if (isBuiltinAssumeWithSideEffects(FD->getASTContext(), C) || + FD->getBuiltinID() == Builtin::BI__builtin_object_size || FD->getBuiltinID() == Builtin::BI__builtin_dynamic_object_size) OmitArguments = true; } diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index 8944343484e58..823d7543f085f 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -80,6 +80,17 @@ static bool canExprResolveTo(const Expr *Source, const Expr *Target) { namespace { +// `ArraySubscriptExpr` can switch base and idx, e.g. `a[4]` is the same as +// `4[a]`. When type is dependent, we conservatively assume both sides are base. +AST_MATCHER_P(ArraySubscriptExpr, hasBaseConservative, + ast_matchers::internal::Matcher, InnerMatcher) { + if (Node.isTypeDependent()) { + return InnerMatcher.matches(*Node.getLHS(), Finder, Builder) || + InnerMatcher.matches(*Node.getRHS(), Finder, Builder); + } + return InnerMatcher.matches(*Node.getBase(), Finder, Builder); +} + AST_MATCHER(Type, isDependentType) { return Node.isDependentType(); } AST_MATCHER_P(LambdaExpr, hasCaptureInit, const Expr *, E) { @@ -513,8 +524,8 @@ ExprMutationAnalyzer::Analyzer::findArrayElementMutation(const Expr *Exp) { // Check whether any element of an array is mutated. const auto SubscriptExprs = match( findAll(arraySubscriptExpr( - anyOf(hasBase(canResolveToExpr(Exp)), - hasBase(implicitCastExpr(allOf( + anyOf(hasBaseConservative(canResolveToExpr(Exp)), + hasBaseConservative(implicitCastExpr(allOf( hasCastKind(CK_ArrayToPointerDecay), hasSourceExpression(canResolveToExpr(Exp))))))) .bind(NodeID::value)), @@ -716,7 +727,8 @@ ExprMutationAnalyzer::Analyzer::findPointeeValueMutation(const Expr *Exp) { unaryOperator(hasOperatorName("*"), hasUnaryOperand(canResolveToExprPointee(Exp))), // deref by [] - arraySubscriptExpr(hasBase(canResolveToExprPointee(Exp))))) + arraySubscriptExpr( + hasBaseConservative(canResolveToExprPointee(Exp))))) .bind(NodeID::value))), Stm, Context); return findExprMutation(Matches); diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index e1394e28cd49a..9381c5c42e566 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -580,6 +580,22 @@ void handleConstMemberCall(const CallExpr *CE, return; } + // Cache if the const method returns a reference + if (RecordLoc != nullptr && CE->isGLValue()) { + const FunctionDecl *DirectCallee = CE->getDirectCallee(); + if (DirectCallee == nullptr) + return; + + StorageLocation &Loc = + State.Lattice.getOrCreateConstMethodReturnStorageLocation( + *RecordLoc, DirectCallee, State.Env, [&](StorageLocation &Loc) { + // no-op + }); + + State.Env.setStorageLocation(*CE, Loc); + return; + } + // Cache if the const method returns a boolean or pointer type. // We may decide to cache other return types in the future. if (RecordLoc != nullptr && diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index ff4f940a596e3..12e99143cb148 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -2364,12 +2364,13 @@ template static std::optional getEndCharLoc(const NodeTy *Node, const SourceManager &SM, const LangOptions &LangOpts) { - unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts); - SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); - - if (Loc.isValid()) - return Loc; + if (unsigned TkLen = + Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts)) { + SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); + if (Loc.isValid()) + return Loc; + } return std::nullopt; } diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 281aebdb1c35d..c6d228fe98100 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -749,8 +749,14 @@ std::unique_ptr AllocateTarget(const llvm::Triple &Triple, case llvm::Triple::loongarch64: switch (os) { case llvm::Triple::Linux: - return std::make_unique>(Triple, - Opts); + switch (Triple.getEnvironment()) { + default: + return std::make_unique>(Triple, + Opts); + case llvm::Triple::OpenHOS: + return std::make_unique>(Triple, + Opts); + } case llvm::Triple::FreeBSD: return std::make_unique>(Triple, Opts); diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp index a463de0884020..4b85a3645b17d 100644 --- a/clang/lib/Basic/Targets/BPF.cpp +++ b/clang/lib/Basic/Targets/BPF.cpp @@ -75,6 +75,7 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__BPF_FEATURE_SDIV_SMOD"); Builder.defineMacro("__BPF_FEATURE_GOTOL"); Builder.defineMacro("__BPF_FEATURE_ST"); + Builder.defineMacro("__BPF_FEATURE_LOAD_ACQ_STORE_REL"); } } diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 991efd2bde01f..a88c851797aab 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -817,6 +817,7 @@ class LLVM_LIBRARY_VISIBILITY UEFITargetInfo : public OSTargetInfo { : OSTargetInfo(Triple, Opts) { this->WCharType = TargetInfo::UnsignedShort; this->WIntType = TargetInfo::UnsignedShort; + this->UseMicrosoftManglingForC = true; } }; @@ -837,6 +838,7 @@ class LLVM_LIBRARY_VISIBILITY WindowsTargetInfo : public OSTargetInfo { : OSTargetInfo(Triple, Opts) { this->WCharType = TargetInfo::UnsignedShort; this->WIntType = TargetInfo::UnsignedShort; + this->UseMicrosoftManglingForC = true; } }; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index dff990d15dd62..fad698d985af7 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -559,6 +559,18 @@ RISCVTargetInfo::checkCallingConvention(CallingConv CC) const { return CCCR_Warning; case CC_C: case CC_RISCVVectorCall: + case CC_RISCVVLSCall_32: + case CC_RISCVVLSCall_64: + case CC_RISCVVLSCall_128: + case CC_RISCVVLSCall_256: + case CC_RISCVVLSCall_512: + case CC_RISCVVLSCall_1024: + case CC_RISCVVLSCall_2048: + case CC_RISCVVLSCall_4096: + case CC_RISCVVLSCall_8192: + case CC_RISCVVLSCall_16384: + case CC_RISCVVLSCall_32768: + case CC_RISCVVLSCall_65536: return CCCR_OK; } } diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h index 72e7e1dcf1560..fba1ffd90877b 100644 --- a/clang/lib/CIR/CodeGen/Address.h +++ b/clang/lib/CIR/CodeGen/Address.h @@ -52,6 +52,14 @@ class Address { elementType); } + Address(mlir::Value pointer, clang::CharUnits alignment) + : Address(pointer, + mlir::cast(pointer.getType()).getPointee(), + alignment) { + assert((!alignment.isZero() || pointer == nullptr) && + "creating valid address with invalid alignment"); + } + static Address invalid() { return Address(nullptr); } bool isValid() const { return pointerAndKnownNonNull.getPointer() != nullptr; diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.h b/clang/lib/CIR/CodeGen/CIRGenCall.h new file mode 100644 index 0000000000000..0996167feeef6 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenCall.h @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// These classes wrap the information about a call or function +// definition used to handle ABI compliancy. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_LIB_CODEGEN_CIRGENCALL_H +#define CLANG_LIB_CODEGEN_CIRGENCALL_H + +#include "clang/AST/GlobalDecl.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang::CIRGen { + +/// Type for representing both the decl and type of parameters to a function. +/// The decl must be either a ParmVarDecl or ImplicitParamDecl. +class FunctionArgList : public llvm::SmallVector {}; + +} // namespace clang::CIRGen + +#endif // CLANG_LIB_CODEGEN_CIRGENCALL_H diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index e44cad559d509..406026b0b9f27 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -44,7 +44,7 @@ void CIRGenFunction::emitAutoVarAlloca(const VarDecl &d) { mlir::Type allocaTy = convertTypeForMem(ty); // Create the temp alloca and declare variable using it. address = createTempAlloca(allocaTy, alignment, loc, d.getName()); - declare(address, &d, ty, getLoc(d.getSourceRange()), alignment); + declare(address.getPointer(), &d, ty, getLoc(d.getSourceRange()), alignment); setAddrOfLocalVar(&d, address); } @@ -62,7 +62,7 @@ void CIRGenFunction::emitAutoVarInit(const clang::VarDecl &d) { void CIRGenFunction::emitAutoVarCleanups(const clang::VarDecl &d) { // Check the type for a cleanup. - if (QualType::DestructionKind dtorKind = d.needsDestruction(getContext())) + if (d.needsDestruction(getContext())) cgm.errorNYI(d.getSourceRange(), "emitAutoVarCleanups: type cleanup"); assert(!cir::MissingFeatures::opAllocaPreciseLifetime()); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 90a2fd2a5d806..b9e56dc4123d6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -74,6 +74,15 @@ class ScalarExprEmitter : public StmtVisitor { builder.getAttr(type, e->getValue())); } + mlir::Value VisitFloatingLiteral(const FloatingLiteral *e) { + mlir::Type type = cgf.convertType(e->getType()); + assert(mlir::isa(type) && + "expect floating-point type"); + return builder.create( + cgf.getLoc(e->getExprLoc()), type, + builder.getAttr(type, e->getValue())); + } + mlir::Value VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *e) { mlir::Type type = cgf.convertType(e->getType()); return builder.create( @@ -98,6 +107,7 @@ class ScalarExprEmitter : public StmtVisitor { cgf.getCIRGenModule().errorNYI(loc, "emitScalarConversion for unequal types"); + return {}; } }; diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 86986b5847e98..47d296b70d789 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -12,6 +12,9 @@ #include "CIRGenFunction.h" +#include "CIRGenCall.h" +#include "CIRGenValue.h" +#include "mlir/IR/Location.h" #include "clang/AST/GlobalDecl.h" #include "clang/CIR/MissingFeatures.h" @@ -132,24 +135,23 @@ mlir::Location CIRGenFunction::getLoc(mlir::Location lhs, mlir::Location rhs) { return mlir::FusedLoc::get(locs, metadata, &getMLIRContext()); } -mlir::LogicalResult CIRGenFunction::declare(Address addr, const Decl *var, - QualType ty, mlir::Location loc, - CharUnits alignment) { +void CIRGenFunction::declare(mlir::Value addrVal, const Decl *var, QualType ty, + mlir::Location loc, CharUnits alignment, + bool isParam) { const auto *namedVar = dyn_cast_or_null(var); assert(namedVar && "Needs a named decl"); assert(!cir::MissingFeatures::cgfSymbolTable()); - mlir::Value addrVal = addr.getPointer(); auto allocaOp = cast(addrVal.getDefiningOp()); + if (isParam) + allocaOp.setInitAttr(mlir::UnitAttr::get(&getMLIRContext())); if (ty->isReferenceType() || ty.isConstQualified()) allocaOp.setConstantAttr(mlir::UnitAttr::get(&getMLIRContext())); - - return mlir::success(); } void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType, cir::FuncOp fn, cir::FuncType funcType, - SourceLocation loc, + FunctionArgList args, SourceLocation loc, SourceLocation startLoc) { assert(!curFn && "CIRGenFunction can only be used for one function at a time"); @@ -157,8 +159,41 @@ void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType, fnRetTy = returnType; curFn = fn; + const auto *fd = dyn_cast_or_null(gd.getDecl()); + mlir::Block *entryBB = &fn.getBlocks().front(); builder.setInsertionPointToStart(entryBB); + + // TODO(cir): this should live in `emitFunctionProlog + // Declare all the function arguments in the symbol table. + for (const auto nameValue : llvm::zip(args, entryBB->getArguments())) { + const VarDecl *paramVar = std::get<0>(nameValue); + mlir::Value paramVal = std::get<1>(nameValue); + CharUnits alignment = getContext().getDeclAlign(paramVar); + mlir::Location paramLoc = getLoc(paramVar->getSourceRange()); + paramVal.setLoc(paramLoc); + + mlir::Value addrVal = + emitAlloca(cast(paramVar)->getName(), + convertType(paramVar->getType()), paramLoc, alignment); + + declare(addrVal, paramVar, paramVar->getType(), paramLoc, alignment, + /*isParam=*/true); + + setAddrOfLocalVar(paramVar, Address(addrVal, alignment)); + + bool isPromoted = isa(paramVar) && + cast(paramVar)->isKNRPromoted(); + assert(!cir::MissingFeatures::constructABIArgDirectExtend()); + if (isPromoted) + cgm.errorNYI(fd->getSourceRange(), "Function argument demotion"); + + // Location of the store to the param storage tracked as beginning of + // the function body. + mlir::Location fnBodyBegin = getLoc(fd->getBody()->getBeginLoc()); + builder.CIRBaseBuilderTy::createStore(fnBodyBegin, paramVal, addrVal); + } + assert(builder.getInsertionBlock() && "Should be valid"); } void CIRGenFunction::finishFunction(SourceLocation endLoc) {} @@ -187,8 +222,10 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, // This will be used once more code is upstreamed. [[maybe_unused]] mlir::Block *entryBB = fn.addEntryBlock(); - startFunction(gd, funcDecl->getReturnType(), fn, funcType, loc, - bodyRange.getBegin()); + FunctionArgList args; + QualType retTy = buildFunctionArgList(gd, args); + + startFunction(gd, retTy, fn, funcType, args, loc, bodyRange.getBegin()); if (isa(funcDecl)) getCIRGenModule().errorNYI(bodyRange, "C++ destructor definition"); @@ -234,6 +271,29 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, return fn; } +clang::QualType CIRGenFunction::buildFunctionArgList(clang::GlobalDecl gd, + FunctionArgList &args) { + const auto *fd = cast(gd.getDecl()); + QualType retTy = fd->getReturnType(); + + const auto *md = dyn_cast(fd); + if (md && md->isInstance()) + cgm.errorNYI(fd->getSourceRange(), "buildFunctionArgList: CXXMethodDecl"); + + if (isa(fd)) + cgm.errorNYI(fd->getSourceRange(), + "buildFunctionArgList: CXXConstructorDecl"); + + for (auto *param : fd->parameters()) + args.push_back(param); + + if (md && (isa(md) || isa(md))) + cgm.errorNYI(fd->getSourceRange(), + "buildFunctionArgList: implicit structor params"); + + return retTy; +} + /// Emit code to compute a designator that specifies the location /// of the expression. /// FIXME: document this function better. @@ -244,7 +304,7 @@ LValue CIRGenFunction::emitLValue(const Expr *e) { getCIRGenModule().errorNYI(e->getSourceRange(), std::string("l-value not implemented for '") + e->getStmtClassName() + "'"); - break; + return LValue(); case Expr::DeclRefExprClass: return emitDeclRefLValue(cast(e)); } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index e0888acdc3dce..cf896d3c0a946 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -14,6 +14,7 @@ #define CLANG_LIB_CIR_CODEGEN_CIRGENFUNCTION_H #include "CIRGenBuilder.h" +#include "CIRGenCall.h" #include "CIRGenModule.h" #include "CIRGenTypeCache.h" #include "CIRGenValue.h" @@ -96,9 +97,9 @@ class CIRGenFunction : public CIRGenTypeCache { private: /// Declare a variable in the current scope, return success if the variable /// wasn't declared yet. - mlir::LogicalResult declare(Address addr, const clang::Decl *var, - clang::QualType ty, mlir::Location loc, - clang::CharUnits alignment); + void declare(mlir::Value addrVal, const clang::Decl *var, clang::QualType ty, + mlir::Location loc, clang::CharUnits alignment, + bool isParam = false); public: mlir::Value emitAlloca(llvm::StringRef name, mlir::Type ty, @@ -196,12 +197,16 @@ class CIRGenFunction : public CIRGenTypeCache { cir::FuncOp generateCode(clang::GlobalDecl gd, cir::FuncOp fn, cir::FuncType funcType); + clang::QualType buildFunctionArgList(clang::GlobalDecl gd, + FunctionArgList &args); + /// Emit code for the start of a function. /// \param loc The location to be associated with the function. /// \param startLoc The location of the function body. void startFunction(clang::GlobalDecl gd, clang::QualType retTy, cir::FuncOp fn, cir::FuncType funcType, - clang::SourceLocation loc, clang::SourceLocation startLoc); + FunctionArgList args, clang::SourceLocation loc, + clang::SourceLocation startLoc); Address createTempAlloca(mlir::Type ty, CharUnits align, mlir::Location loc, const Twine &name = "tmp"); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index d8acc99e550ad..0e3e15ca2cadc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -18,6 +18,7 @@ #include "clang/AST/GlobalDecl.h" #include "clang/Basic/SourceManager.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/MissingFeatures.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Location.h" @@ -31,7 +32,7 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext, const clang::CodeGenOptions &cgo, DiagnosticsEngine &diags) : builder(mlirContext, *this), astContext(astContext), - langOpts(astContext.getLangOpts()), + langOpts(astContext.getLangOpts()), codeGenOpts(cgo), theModule{mlir::ModuleOp::create(mlir::UnknownLoc::get(&mlirContext))}, diags(diags), target(astContext.getTargetInfo()), genTypes(*this) { @@ -176,6 +177,17 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, } varOp.setInitialValueAttr(initializer); } + + // Set CIR's linkage type as appropriate. + cir::GlobalLinkageKind linkage = + getCIRLinkageVarDefinition(vd, /*IsConstant=*/false); + + // Set CIR linkage and DLL storage class. + varOp.setLinkage(linkage); + + if (linkage == cir::GlobalLinkageKind::CommonLinkage) + errorNYI(initExpr->getSourceRange(), "common linkage"); + theModule.push_back(varOp); } else { errorNYI(vd->getSourceRange().getBegin(), @@ -210,6 +222,193 @@ void CIRGenModule::emitGlobalDefinition(clang::GlobalDecl gd, llvm_unreachable("Invalid argument to CIRGenModule::emitGlobalDefinition"); } +static bool shouldBeInCOMDAT(CIRGenModule &cgm, const Decl &d) { + assert(!cir::MissingFeatures::supportComdat()); + + if (d.hasAttr()) + return true; + + GVALinkage linkage; + if (auto *vd = dyn_cast(&d)) + linkage = cgm.getASTContext().GetGVALinkageForVariable(vd); + else + linkage = + cgm.getASTContext().GetGVALinkageForFunction(cast(&d)); + + switch (linkage) { + case clang::GVA_Internal: + case clang::GVA_AvailableExternally: + case clang::GVA_StrongExternal: + return false; + case clang::GVA_DiscardableODR: + case clang::GVA_StrongODR: + return true; + } + llvm_unreachable("No such linkage"); +} + +// TODO(CIR): this could be a common method between LLVM codegen. +static bool isVarDeclStrongDefinition(const ASTContext &astContext, + CIRGenModule &cgm, const VarDecl *vd, + bool noCommon) { + // Don't give variables common linkage if -fno-common was specified unless it + // was overridden by a NoCommon attribute. + if ((noCommon || vd->hasAttr()) && !vd->hasAttr()) + return true; + + // C11 6.9.2/2: + // A declaration of an identifier for an object that has file scope without + // an initializer, and without a storage-class specifier or with the + // storage-class specifier static, constitutes a tentative definition. + if (vd->getInit() || vd->hasExternalStorage()) + return true; + + // A variable cannot be both common and exist in a section. + if (vd->hasAttr()) + return true; + + // A variable cannot be both common and exist in a section. + // We don't try to determine which is the right section in the front-end. + // If no specialized section name is applicable, it will resort to default. + if (vd->hasAttr() || + vd->hasAttr() || + vd->hasAttr() || + vd->hasAttr()) + return true; + + // Thread local vars aren't considered common linkage. + if (vd->getTLSKind()) + return true; + + // Tentative definitions marked with WeakImportAttr are true definitions. + if (vd->hasAttr()) + return true; + + // A variable cannot be both common and exist in a comdat. + if (shouldBeInCOMDAT(cgm, *vd)) + return true; + + // Declarations with a required alignment do not have common linkage in MSVC + // mode. + if (astContext.getTargetInfo().getCXXABI().isMicrosoft()) { + if (vd->hasAttr()) + return true; + QualType varType = vd->getType(); + if (astContext.isAlignmentRequired(varType)) + return true; + + if (const auto *rt = varType->getAs()) { + const RecordDecl *rd = rt->getDecl(); + for (const FieldDecl *fd : rd->fields()) { + if (fd->isBitField()) + continue; + if (fd->hasAttr()) + return true; + if (astContext.isAlignmentRequired(fd->getType())) + return true; + } + } + } + + // Microsoft's link.exe doesn't support alignments greater than 32 bytes for + // common symbols, so symbols with greater alignment requirements cannot be + // common. + // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two + // alignments for common symbols via the aligncomm directive, so this + // restriction only applies to MSVC environments. + if (astContext.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() && + astContext.getTypeAlignIfKnown(vd->getType()) > + astContext.toBits(CharUnits::fromQuantity(32))) + return true; + + return false; +} + +cir::GlobalLinkageKind CIRGenModule::getCIRLinkageForDeclarator( + const DeclaratorDecl *dd, GVALinkage linkage, bool isConstantVariable) { + if (linkage == GVA_Internal) + return cir::GlobalLinkageKind::InternalLinkage; + + if (dd->hasAttr()) { + if (isConstantVariable) + return cir::GlobalLinkageKind::WeakODRLinkage; + return cir::GlobalLinkageKind::WeakAnyLinkage; + } + + if (const auto *fd = dd->getAsFunction()) + if (fd->isMultiVersion() && linkage == GVA_AvailableExternally) + return cir::GlobalLinkageKind::LinkOnceAnyLinkage; + + // We are guaranteed to have a strong definition somewhere else, + // so we can use available_externally linkage. + if (linkage == GVA_AvailableExternally) + return cir::GlobalLinkageKind::AvailableExternallyLinkage; + + // Note that Apple's kernel linker doesn't support symbol + // coalescing, so we need to avoid linkonce and weak linkages there. + // Normally, this means we just map to internal, but for explicit + // instantiations we'll map to external. + + // In C++, the compiler has to emit a definition in every translation unit + // that references the function. We should use linkonce_odr because + // a) if all references in this translation unit are optimized away, we + // don't need to codegen it. b) if the function persists, it needs to be + // merged with other definitions. c) C++ has the ODR, so we know the + // definition is dependable. + if (linkage == GVA_DiscardableODR) + return !astContext.getLangOpts().AppleKext + ? cir::GlobalLinkageKind::LinkOnceODRLinkage + : cir::GlobalLinkageKind::InternalLinkage; + + // An explicit instantiation of a template has weak linkage, since + // explicit instantiations can occur in multiple translation units + // and must all be equivalent. However, we are not allowed to + // throw away these explicit instantiations. + // + // CUDA/HIP: For -fno-gpu-rdc case, device code is limited to one TU, + // so say that CUDA templates are either external (for kernels) or internal. + // This lets llvm perform aggressive inter-procedural optimizations. For + // -fgpu-rdc case, device function calls across multiple TU's are allowed, + // therefore we need to follow the normal linkage paradigm. + if (linkage == GVA_StrongODR) { + if (getLangOpts().AppleKext) + return cir::GlobalLinkageKind::ExternalLinkage; + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + !getLangOpts().GPURelocatableDeviceCode) + return dd->hasAttr() + ? cir::GlobalLinkageKind::ExternalLinkage + : cir::GlobalLinkageKind::InternalLinkage; + return cir::GlobalLinkageKind::WeakODRLinkage; + } + + // C++ doesn't have tentative definitions and thus cannot have common + // linkage. + if (!getLangOpts().CPlusPlus && isa(dd) && + !isVarDeclStrongDefinition(astContext, *this, cast(dd), + getCodeGenOpts().NoCommon)) { + errorNYI(dd->getBeginLoc(), "common linkage", dd->getDeclKindName()); + return cir::GlobalLinkageKind::CommonLinkage; + } + + // selectany symbols are externally visible, so use weak instead of + // linkonce. MSVC optimizes away references to const selectany globals, so + // all definitions should be the same and ODR linkage should be used. + // http://msdn.microsoft.com/en-us/library/5tkz6s71.aspx + if (dd->hasAttr()) + return cir::GlobalLinkageKind::WeakODRLinkage; + + // Otherwise, we have strong external linkage. + assert(linkage == GVA_StrongExternal); + return cir::GlobalLinkageKind::ExternalLinkage; +} + +cir::GlobalLinkageKind +CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) { + assert(!isConstant && "constant variables NYI"); + GVALinkage linkage = astContext.GetGVALinkageForVariable(vd); + return getCIRLinkageForDeclarator(vd, linkage, isConstant); +} + // Emit code for a single top level declaration. void CIRGenModule::emitTopLevelDecl(Decl *decl) { diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 71a37b8c9a2ea..5fb5ef505a8c1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -23,8 +23,10 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" +#include "clang/AST/Decl.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "llvm/ADT/StringRef.h" #include "llvm/TargetParser/Triple.h" @@ -62,6 +64,8 @@ class CIRGenModule : public CIRGenTypeCache { const clang::LangOptions &langOpts; + const clang::CodeGenOptions &codeGenOpts; + /// A "module" matches a c/cpp source file: containing a list of functions. mlir::ModuleOp theModule; @@ -75,6 +79,7 @@ class CIRGenModule : public CIRGenTypeCache { mlir::ModuleOp getModule() const { return theModule; } CIRGenBuilderTy &getBuilder() { return builder; } clang::ASTContext &getASTContext() const { return astContext; } + const clang::CodeGenOptions &getCodeGenOpts() const { return codeGenOpts; } CIRGenTypes &getTypes() { return genTypes; } const clang::LangOptions &getLangOpts() const { return langOpts; } mlir::MLIRContext &getMLIRContext() { return *builder.getContext(); } @@ -123,6 +128,13 @@ class CIRGenModule : public CIRGenTypeCache { const llvm::Triple &getTriple() const { return target.getTriple(); } + cir::GlobalLinkageKind getCIRLinkageForDeclarator(const DeclaratorDecl *dd, + GVALinkage linkage, + bool isConstantVariable); + + cir::GlobalLinkageKind getCIRLinkageVarDefinition(const VarDecl *vd, + bool isConstant); + /// Helpers to emit "not yet implemented" error diagnostics DiagnosticBuilder errorNYI(SourceLocation, llvm::StringRef); diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index dbb6d9e7b3807..e6d3cbabd853b 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -18,6 +18,7 @@ add_clang_library(clangCIR DEPENDS MLIRCIR + MLIRCIROpInterfacesIncGen ${dialect_libs} LINK_LIBS diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index aa21edcb5e99d..5ad369b40cda1 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -18,6 +18,7 @@ #include "mlir/Support/LogicalResult.h" #include "clang/CIR/Dialect/IR/CIROpsDialect.cpp.inc" +#include "clang/CIR/Dialect/IR/CIROpsEnums.cpp.inc" using namespace mlir; using namespace cir; @@ -297,11 +298,16 @@ mlir::LogicalResult cir::GlobalOp::verify() { } void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState, - llvm::StringRef sym_name, mlir::Type sym_type) { + llvm::StringRef sym_name, mlir::Type sym_type, + cir::GlobalLinkageKind linkage) { odsState.addAttribute(getSymNameAttrName(odsState.name), odsBuilder.getStringAttr(sym_name)); odsState.addAttribute(getSymTypeAttrName(odsState.name), mlir::TypeAttr::get(sym_type)); + + cir::GlobalLinkageKindAttr linkageAttr = + cir::GlobalLinkageKindAttr::get(odsBuilder.getContext(), linkage); + odsState.addAttribute(getLinkageAttrName(odsState.name), linkageAttr); } static void printGlobalOpTypeAndInitialValue(OpAsmPrinter &p, cir::GlobalOp op, diff --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp index af6b5e4fbd9f6..5e44837979af3 100644 --- a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp @@ -75,3 +75,36 @@ DeletionKind cir::LoadOp::removeBlockingUses( getResult().replaceAllUsesWith(reachingDefinition); return DeletionKind::Delete; } + +//===----------------------------------------------------------------------===// +// Interfaces for StoreOp +//===----------------------------------------------------------------------===// + +bool cir::StoreOp::loadsFrom(const MemorySlot &slot) { return false; } + +bool cir::StoreOp::storesTo(const MemorySlot &slot) { + return getAddr() == slot.ptr; +} + +Value cir::StoreOp::getStored(const MemorySlot &slot, OpBuilder &builder, + Value reachingDef, const DataLayout &dataLayout) { + return getValue(); +} + +bool cir::StoreOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses, + const DataLayout &dataLayout) { + if (blockingUses.size() != 1) + return false; + Value blockingUse = (*blockingUses.begin())->get(); + return blockingUse == slot.ptr && getAddr() == slot.ptr && + getValue() != slot.ptr && slot.elemType == getValue().getType(); +} + +DeletionKind cir::StoreOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder, Value reachingDefinition, + const DataLayout &dataLayout) { + return DeletionKind::Delete; +} diff --git a/clang/lib/CIR/Dialect/IR/CMakeLists.txt b/clang/lib/CIR/Dialect/IR/CMakeLists.txt index 925af0d61c984..e3a6fc6e80ecc 100644 --- a/clang/lib/CIR/Dialect/IR/CMakeLists.txt +++ b/clang/lib/CIR/Dialect/IR/CMakeLists.txt @@ -6,7 +6,8 @@ add_clang_library(MLIRCIR DEPENDS MLIRCIROpsIncGen - MLIRCIRAttrsEnumsGen + MLIRCIREnumsGen + MLIRCIROpInterfacesIncGen LINK_LIBS PUBLIC MLIRIR diff --git a/clang/lib/CIR/FrontendAction/CMakeLists.txt b/clang/lib/CIR/FrontendAction/CMakeLists.txt index ac2b857239d07..6d5a8758468f6 100644 --- a/clang/lib/CIR/FrontendAction/CMakeLists.txt +++ b/clang/lib/CIR/FrontendAction/CMakeLists.txt @@ -8,6 +8,10 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_clang_library(clangCIRFrontendAction CIRGenAction.cpp + DEPENDS + MLIRCIROpsIncGen + MLIRCIROpInterfacesIncGen + LINK_LIBS clangAST clangFrontend diff --git a/clang/lib/CIR/Interfaces/CIROpInterfaces.cpp b/clang/lib/CIR/Interfaces/CIROpInterfaces.cpp new file mode 100644 index 0000000000000..3ae103c2e65b4 --- /dev/null +++ b/clang/lib/CIR/Interfaces/CIROpInterfaces.cpp @@ -0,0 +1,26 @@ +//====- CIROpInterfaces.cpp - Interface to AST Attributes ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the interface to CIR operations. +// +//===----------------------------------------------------------------------===// +#include "clang/CIR/Interfaces/CIROpInterfaces.h" + +using namespace cir; + +/// Include the generated type qualifiers interfaces. +#include "clang/CIR/Interfaces/CIROpInterfaces.cpp.inc" + +#include "clang/CIR/MissingFeatures.h" + +bool CIRGlobalValueInterface::canBenefitFromLocalAlias() { + assert(!cir::MissingFeatures::supportIFuncAttr()); + assert(!cir::MissingFeatures::supportVisibility()); + assert(!cir::MissingFeatures::supportComdat()); + return false; +} diff --git a/clang/lib/CIR/Interfaces/CMakeLists.txt b/clang/lib/CIR/Interfaces/CMakeLists.txt index b826bf612cc35..2fe5714520b74 100644 --- a/clang/lib/CIR/Interfaces/CMakeLists.txt +++ b/clang/lib/CIR/Interfaces/CMakeLists.txt @@ -1,12 +1,14 @@ add_clang_library(MLIRCIRInterfaces + CIROpInterfaces.cpp CIRFPTypeInterface.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces DEPENDS - MLIRCIRAttrsEnumsGen + MLIRCIREnumsGen MLIRCIRFPTypeInterfaceIncGen + MLIRCIROpInterfacesIncGen LINK_LIBS ${dialect_libs} diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt index 3f74c79249a27..c11ecb82183d0 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt +++ b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt @@ -8,6 +8,11 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_clang_library(clangCIRLoweringDirectToLLVM LowerToLLVM.cpp + DEPENDS + MLIRCIREnumsGen + MLIRCIROpsIncGen + MLIRCIROpInterfacesIncGen + LINK_LIBS MLIRIR ${dialect_libs} diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index ba7fab2865116..3200527bd03af 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -24,11 +24,12 @@ #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Transforms/DialectConversion.h" -#include "clang/CIR/Dialect/IR/CIRAttrVisitor.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/MissingFeatures.h" #include "clang/CIR/Passes.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/IR/Module.h" +#include "llvm/Support/Error.h" #include "llvm/Support/TimeProfiler.h" using namespace cir; @@ -37,41 +38,122 @@ using namespace llvm; namespace cir { namespace direct { -class CIRAttrToValue : public CirAttrVisitor { +/// Given a type convertor and a data layout, convert the given type to a type +/// that is suitable for memory operations. For example, this can be used to +/// lower cir.bool accesses to i8. +static mlir::Type convertTypeForMemory(const mlir::TypeConverter &converter, + mlir::DataLayout const &dataLayout, + mlir::Type type) { + // TODO(cir): Handle other types similarly to clang's codegen + // convertTypeForMemory + if (isa(type)) { + return mlir::IntegerType::get(type.getContext(), + dataLayout.getTypeSizeInBits(type)); + } + + return converter.convertType(type); +} + +static mlir::Value createIntCast(mlir::OpBuilder &bld, mlir::Value src, + mlir::IntegerType dstTy, + bool isSigned = false) { + mlir::Type srcTy = src.getType(); + assert(mlir::isa(srcTy)); + + unsigned srcWidth = mlir::cast(srcTy).getWidth(); + unsigned dstWidth = mlir::cast(dstTy).getWidth(); + mlir::Location loc = src.getLoc(); + + if (dstWidth > srcWidth && isSigned) + return bld.create(loc, dstTy, src); + else if (dstWidth > srcWidth) + return bld.create(loc, dstTy, src); + else if (dstWidth < srcWidth) + return bld.create(loc, dstTy, src); + else + return bld.create(loc, dstTy, src); +} + +/// Emits the value from memory as expected by its users. Should be called when +/// the memory represetnation of a CIR type is not equal to its scalar +/// representation. +static mlir::Value emitFromMemory(mlir::ConversionPatternRewriter &rewriter, + mlir::DataLayout const &dataLayout, + cir::LoadOp op, mlir::Value value) { + + // TODO(cir): Handle other types similarly to clang's codegen EmitFromMemory + if (auto boolTy = mlir::dyn_cast(op.getResult().getType())) { + // Create a cast value from specified size in datalayout to i1 + assert(value.getType().isInteger(dataLayout.getTypeSizeInBits(boolTy))); + return createIntCast(rewriter, value, rewriter.getI1Type()); + } + + return value; +} + +/// Emits a value to memory with the expected scalar type. Should be called when +/// the memory represetnation of a CIR type is not equal to its scalar +/// representation. +static mlir::Value emitToMemory(mlir::ConversionPatternRewriter &rewriter, + mlir::DataLayout const &dataLayout, + mlir::Type origType, mlir::Value value) { + + // TODO(cir): Handle other types similarly to clang's codegen EmitToMemory + if (auto boolTy = mlir::dyn_cast(origType)) { + // Create zext of value from i1 to i8 + mlir::IntegerType memType = + rewriter.getIntegerType(dataLayout.getTypeSizeInBits(boolTy)); + return createIntCast(rewriter, value, memType); + } + + return value; +} + +mlir::LLVM::Linkage convertLinkage(cir::GlobalLinkageKind linkage) { + using CIR = cir::GlobalLinkageKind; + using LLVM = mlir::LLVM::Linkage; + + switch (linkage) { + case CIR::AvailableExternallyLinkage: + return LLVM::AvailableExternally; + case CIR::CommonLinkage: + return LLVM::Common; + case CIR::ExternalLinkage: + return LLVM::External; + case CIR::ExternalWeakLinkage: + return LLVM::ExternWeak; + case CIR::InternalLinkage: + return LLVM::Internal; + case CIR::LinkOnceAnyLinkage: + return LLVM::Linkonce; + case CIR::LinkOnceODRLinkage: + return LLVM::LinkonceODR; + case CIR::PrivateLinkage: + return LLVM::Private; + case CIR::WeakAnyLinkage: + return LLVM::Weak; + case CIR::WeakODRLinkage: + return LLVM::WeakODR; + }; +} + +class CIRAttrToValue { public: CIRAttrToValue(mlir::Operation *parentOp, mlir::ConversionPatternRewriter &rewriter, const mlir::TypeConverter *converter) : parentOp(parentOp), rewriter(rewriter), converter(converter) {} - mlir::Value lowerCirAttrAsValue(mlir::Attribute attr) { return visit(attr); } - - mlir::Value visitCirIntAttr(cir::IntAttr intAttr) { - mlir::Location loc = parentOp->getLoc(); - return rewriter.create( - loc, converter->convertType(intAttr.getType()), intAttr.getValue()); + mlir::Value visit(mlir::Attribute attr) { + return llvm::TypeSwitch(attr) + .Case( + [&](auto attrT) { return visitCirAttr(attrT); }) + .Default([&](auto attrT) { return mlir::Value(); }); } - mlir::Value visitCirFPAttr(cir::FPAttr fltAttr) { - mlir::Location loc = parentOp->getLoc(); - return rewriter.create( - loc, converter->convertType(fltAttr.getType()), fltAttr.getValue()); - } - - mlir::Value visitCirConstPtrAttr(cir::ConstPtrAttr ptrAttr) { - mlir::Location loc = parentOp->getLoc(); - if (ptrAttr.isNullValue()) { - return rewriter.create( - loc, converter->convertType(ptrAttr.getType())); - } - mlir::DataLayout layout(parentOp->getParentOfType()); - mlir::Value ptrVal = rewriter.create( - loc, - rewriter.getIntegerType(layout.getTypeSizeInBits(ptrAttr.getType())), - ptrAttr.getValue().getInt()); - return rewriter.create( - loc, converter->convertType(ptrAttr.getType()), ptrVal); - } + mlir::Value visitCirAttr(cir::IntAttr intAttr); + mlir::Value visitCirAttr(cir::FPAttr fltAttr); + mlir::Value visitCirAttr(cir::ConstPtrAttr ptrAttr); private: mlir::Operation *parentOp; @@ -79,23 +161,59 @@ class CIRAttrToValue : public CirAttrVisitor { const mlir::TypeConverter *converter; }; +/// IntAttr visitor. +mlir::Value CIRAttrToValue::visitCirAttr(cir::IntAttr intAttr) { + mlir::Location loc = parentOp->getLoc(); + return rewriter.create( + loc, converter->convertType(intAttr.getType()), intAttr.getValue()); +} + +/// ConstPtrAttr visitor. +mlir::Value CIRAttrToValue::visitCirAttr(cir::ConstPtrAttr ptrAttr) { + mlir::Location loc = parentOp->getLoc(); + if (ptrAttr.isNullValue()) { + return rewriter.create( + loc, converter->convertType(ptrAttr.getType())); + } + mlir::DataLayout layout(parentOp->getParentOfType()); + mlir::Value ptrVal = rewriter.create( + loc, rewriter.getIntegerType(layout.getTypeSizeInBits(ptrAttr.getType())), + ptrAttr.getValue().getInt()); + return rewriter.create( + loc, converter->convertType(ptrAttr.getType()), ptrVal); +} + +/// FPAttr visitor. +mlir::Value CIRAttrToValue::visitCirAttr(cir::FPAttr fltAttr) { + mlir::Location loc = parentOp->getLoc(); + return rewriter.create( + loc, converter->convertType(fltAttr.getType()), fltAttr.getValue()); +} + // This class handles rewriting initializer attributes for types that do not // require region initialization. -class GlobalInitAttrRewriter - : public CirAttrVisitor { +class GlobalInitAttrRewriter { public: GlobalInitAttrRewriter(mlir::Type type, mlir::ConversionPatternRewriter &rewriter) : llvmType(type), rewriter(rewriter) {} - mlir::Attribute rewriteInitAttr(mlir::Attribute attr) { return visit(attr); } + mlir::Attribute visit(mlir::Attribute attr) { + return llvm::TypeSwitch(attr) + .Case( + [&](auto attrT) { return visitCirAttr(attrT); }) + .Default([&](auto attrT) { return mlir::Attribute(); }); + } - mlir::Attribute visitCirIntAttr(cir::IntAttr attr) { + mlir::Attribute visitCirAttr(cir::IntAttr attr) { return rewriter.getIntegerAttr(llvmType, attr.getValue()); } - mlir::Attribute visitCirFPAttr(cir::FPAttr attr) { + mlir::Attribute visitCirAttr(cir::FPAttr attr) { return rewriter.getFloatAttr(llvmType, attr.getValue()); } + mlir::Attribute visitCirAttr(cir::BoolAttr attr) { + return rewriter.getBoolAttr(attr.getValue()); + } private: mlir::Type llvmType; @@ -124,18 +242,219 @@ struct ConvertCIRToLLVMPass StringRef getArgument() const override { return "cir-flat-to-llvm"; } }; -bool CIRToLLVMGlobalOpLowering::attrRequiresRegionInitialization( - mlir::Attribute attr) const { - // There will be more cases added later. - return isa(attr); +mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite( + cir::AllocaOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + assert(!cir::MissingFeatures::opAllocaDynAllocSize()); + mlir::Value size = rewriter.create( + op.getLoc(), typeConverter->convertType(rewriter.getIndexType()), + rewriter.getIntegerAttr(rewriter.getIndexType(), 1)); + mlir::Type elementTy = + convertTypeForMemory(*getTypeConverter(), dataLayout, op.getAllocaType()); + mlir::Type resultTy = convertTypeForMemory(*getTypeConverter(), dataLayout, + op.getResult().getType()); + + assert(!cir::MissingFeatures::addressSpace()); + assert(!cir::MissingFeatures::opAllocaAnnotations()); + + rewriter.replaceOpWithNewOp( + op, resultTy, elementTy, size, op.getAlignmentAttr().getInt()); + + return mlir::success(); +} + +mlir::LogicalResult CIRToLLVMReturnOpLowering::matchAndRewrite( + cir::ReturnOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + rewriter.replaceOpWithNewOp(op, adaptor.getOperands()); + return mlir::LogicalResult::success(); +} + +mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite( + cir::LoadOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + const mlir::Type llvmTy = convertTypeForMemory( + *getTypeConverter(), dataLayout, op.getResult().getType()); + assert(!cir::MissingFeatures::opLoadStoreMemOrder()); + assert(!cir::MissingFeatures::opLoadStoreAlignment()); + unsigned alignment = (unsigned)dataLayout.getTypeABIAlignment(llvmTy); + + assert(!cir::MissingFeatures::lowerModeOptLevel()); + + // TODO: nontemporal, syncscope. + assert(!cir::MissingFeatures::opLoadStoreVolatile()); + mlir::LLVM::LoadOp newLoad = rewriter.create( + op->getLoc(), llvmTy, adaptor.getAddr(), alignment, + /*volatile=*/false, /*nontemporal=*/false, + /*invariant=*/false, /*invariantGroup=*/false, + mlir::LLVM::AtomicOrdering::not_atomic); + + // Convert adapted result to its original type if needed. + mlir::Value result = + emitFromMemory(rewriter, dataLayout, op, newLoad.getResult()); + rewriter.replaceOp(op, result); + assert(!cir::MissingFeatures::opLoadStoreTbaa()); + return mlir::LogicalResult::success(); +} + +mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite( + cir::StoreOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + assert(!cir::MissingFeatures::opLoadStoreMemOrder()); + assert(!cir::MissingFeatures::opLoadStoreAlignment()); + const mlir::Type llvmTy = + getTypeConverter()->convertType(op.getValue().getType()); + unsigned alignment = (unsigned)dataLayout.getTypeABIAlignment(llvmTy); + + assert(!cir::MissingFeatures::lowerModeOptLevel()); + + // Convert adapted value to its memory type if needed. + mlir::Value value = emitToMemory(rewriter, dataLayout, + op.getValue().getType(), adaptor.getValue()); + // TODO: nontemporal, syncscope. + assert(!cir::MissingFeatures::opLoadStoreVolatile()); + mlir::LLVM::StoreOp storeOp = rewriter.create( + op->getLoc(), value, adaptor.getAddr(), alignment, /*volatile=*/false, + /*nontemporal=*/false, /*invariantGroup=*/false, + mlir::LLVM::AtomicOrdering::not_atomic); + rewriter.replaceOp(op, storeOp); + assert(!cir::MissingFeatures::opLoadStoreTbaa()); + return mlir::LogicalResult::success(); +} + +mlir::LogicalResult CIRToLLVMConstantOpLowering::matchAndRewrite( + cir::ConstantOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Attribute attr = op.getValue(); + + if (mlir::isa(op.getType())) { + // Verified cir.const operations cannot actually be of these types, but the + // lowering pass may generate temporary cir.const operations with these + // types. This is OK since MLIR allows unverified operations to be alive + // during a pass as long as they don't live past the end of the pass. + attr = op.getValue(); + } else if (mlir::isa(op.getType())) { + int value = (op.getValue() == + cir::BoolAttr::get(getContext(), + cir::BoolType::get(getContext()), true)); + attr = rewriter.getIntegerAttr(typeConverter->convertType(op.getType()), + value); + } else if (mlir::isa(op.getType())) { + assert(!cir::MissingFeatures::opGlobalViewAttr()); + + attr = rewriter.getIntegerAttr( + typeConverter->convertType(op.getType()), + mlir::cast(op.getValue()).getValue()); + } else if (mlir::isa(op.getType())) { + attr = rewriter.getFloatAttr( + typeConverter->convertType(op.getType()), + mlir::cast(op.getValue()).getValue()); + } else if (mlir::isa(op.getType())) { + // Optimize with dedicated LLVM op for null pointers. + if (mlir::isa(op.getValue())) { + if (mlir::cast(op.getValue()).isNullValue()) { + rewriter.replaceOpWithNewOp( + op, typeConverter->convertType(op.getType())); + return mlir::success(); + } + } + assert(!cir::MissingFeatures::opGlobalViewAttr()); + attr = op.getValue(); + } else { + return op.emitError() << "unsupported constant type " << op.getType(); + } + + rewriter.replaceOpWithNewOp( + op, getTypeConverter()->convertType(op.getType()), attr); + + return mlir::success(); +} + +/// Convert the `cir.func` attributes to `llvm.func` attributes. +/// Only retain those attributes that are not constructed by +/// `LLVMFuncOp::build`. If `filterArgAttrs` is set, also filter out +/// argument attributes. +void CIRToLLVMFuncOpLowering::lowerFuncAttributes( + cir::FuncOp func, bool filterArgAndResAttrs, + SmallVectorImpl &result) const { + assert(!cir::MissingFeatures::opFuncCallingConv()); + for (mlir::NamedAttribute attr : func->getAttrs()) { + if (attr.getName() == mlir::SymbolTable::getSymbolAttrName() || + attr.getName() == func.getFunctionTypeAttrName() || + attr.getName() == getLinkageAttrNameString() || + (filterArgAndResAttrs && + (attr.getName() == func.getArgAttrsAttrName() || + attr.getName() == func.getResAttrsAttrName()))) + continue; + + assert(!cir::MissingFeatures::opFuncExtraAttrs()); + result.push_back(attr); + } +} + +mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewrite( + cir::FuncOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + + cir::FuncType fnType = op.getFunctionType(); + assert(!cir::MissingFeatures::opFuncDsolocal()); + bool isDsoLocal = false; + mlir::TypeConverter::SignatureConversion signatureConversion( + fnType.getNumInputs()); + + for (const auto &argType : llvm::enumerate(fnType.getInputs())) { + mlir::Type convertedType = typeConverter->convertType(argType.value()); + if (!convertedType) + return mlir::failure(); + signatureConversion.addInputs(argType.index(), convertedType); + } + + mlir::Type resultType = + getTypeConverter()->convertType(fnType.getReturnType()); + + // Create the LLVM function operation. + mlir::Type llvmFnTy = mlir::LLVM::LLVMFunctionType::get( + resultType ? resultType : mlir::LLVM::LLVMVoidType::get(getContext()), + signatureConversion.getConvertedTypes(), + /*isVarArg=*/fnType.isVarArg()); + // LLVMFuncOp expects a single FileLine Location instead of a fused + // location. + mlir::Location loc = op.getLoc(); + if (mlir::FusedLoc fusedLoc = mlir::dyn_cast(loc)) + loc = fusedLoc.getLocations()[0]; + assert((mlir::isa(loc) || + mlir::isa(loc)) && + "expected single location or unknown location here"); + + assert(!cir::MissingFeatures::opFuncLinkage()); + mlir::LLVM::Linkage linkage = mlir::LLVM::Linkage::External; + assert(!cir::MissingFeatures::opFuncCallingConv()); + mlir::LLVM::CConv cconv = mlir::LLVM::CConv::C; + SmallVector attributes; + lowerFuncAttributes(op, /*filterArgAndResAttrs=*/false, attributes); + + mlir::LLVM::LLVMFuncOp fn = rewriter.create( + loc, op.getName(), llvmFnTy, linkage, isDsoLocal, cconv, + mlir::SymbolRefAttr(), attributes); + + assert(!cir::MissingFeatures::opFuncVisibility()); + + rewriter.inlineRegionBefore(op.getBody(), fn.getBody(), fn.end()); + if (failed(rewriter.convertRegionTypes(&fn.getBody(), *typeConverter, + &signatureConversion))) + return mlir::failure(); + + rewriter.eraseOp(op); + + return mlir::LogicalResult::success(); } /// Replace CIR global with a region initialized LLVM global and update /// insertion point to the end of the initializer block. void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp( cir::GlobalOp op, mlir::ConversionPatternRewriter &rewriter) const { - assert(!cir::MissingFeatures::convertTypeForMemory()); - const mlir::Type llvmType = getTypeConverter()->convertType(op.getSymType()); + const mlir::Type llvmType = + convertTypeForMemory(*getTypeConverter(), dataLayout, op.getSymType()); // FIXME: These default values are placeholders until the the equivalent // attributes are available on cir.global ops. This duplicates code @@ -151,15 +470,15 @@ void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp( const bool isThreadLocal = false; assert(!cir::MissingFeatures::opGlobalAlignment()); const uint64_t alignment = 0; - assert(!cir::MissingFeatures::opGlobalLinkage()); - const mlir::LLVM::Linkage linkage = mlir::LLVM::Linkage::External; + const mlir::LLVM::Linkage linkage = convertLinkage(op.getLinkage()); const StringRef symbol = op.getSymName(); SmallVector attributes; - auto newGlobalOp = rewriter.replaceOpWithNewOp( - op, llvmType, isConst, linkage, symbol, nullptr, alignment, addrSpace, - isDsoLocal, isThreadLocal, - /*comdat=*/mlir::SymbolRefAttr(), attributes); + mlir::LLVM::GlobalOp newGlobalOp = + rewriter.replaceOpWithNewOp( + op, llvmType, isConst, linkage, symbol, nullptr, alignment, addrSpace, + isDsoLocal, isThreadLocal, + /*comdat=*/mlir::SymbolRefAttr(), attributes); newGlobalOp.getRegion().emplaceBlock(); rewriter.setInsertionPointToEnd(newGlobalOp.getInitializerBlock()); } @@ -176,8 +495,8 @@ CIRToLLVMGlobalOpLowering::matchAndRewriteRegionInitializedGlobal( // to the appropriate value. const mlir::Location loc = op.getLoc(); setupRegionInitializedLLVMGlobalOp(op, rewriter); - CIRAttrToValue attrVisitor(op, rewriter, typeConverter); - mlir::Value value = attrVisitor.lowerCirAttrAsValue(init); + CIRAttrToValue valueConverter(op, rewriter, typeConverter); + mlir::Value value = valueConverter.visit(init); rewriter.create(loc, value); return mlir::success(); } @@ -188,18 +507,12 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( std::optional init = op.getInitialValue(); - // If we have an initializer and it requires region initialization, handle - // that separately - if (init.has_value() && attrRequiresRegionInitialization(init.value())) { - return matchAndRewriteRegionInitializedGlobal(op, init.value(), rewriter); - } - // Fetch required values to create LLVM op. const mlir::Type cirSymType = op.getSymType(); // This is the LLVM dialect type. - assert(!cir::MissingFeatures::convertTypeForMemory()); - const mlir::Type llvmType = getTypeConverter()->convertType(cirSymType); + const mlir::Type llvmType = + convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType); // FIXME: These default values are placeholders until the the equivalent // attributes are available on cir.global ops. assert(!cir::MissingFeatures::opGlobalConstant()); @@ -212,18 +525,30 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( const bool isThreadLocal = false; assert(!cir::MissingFeatures::opGlobalAlignment()); const uint64_t alignment = 0; - assert(!cir::MissingFeatures::opGlobalLinkage()); - const mlir::LLVM::Linkage linkage = mlir::LLVM::Linkage::External; + const mlir::LLVM::Linkage linkage = convertLinkage(op.getLinkage()); const StringRef symbol = op.getSymName(); SmallVector attributes; if (init.has_value()) { - GlobalInitAttrRewriter initRewriter(llvmType, rewriter); - init = initRewriter.rewriteInitAttr(init.value()); - // If initRewriter returned a null attribute, init will have a value but - // the value will be null. If that happens, initRewriter didn't handle the - // attribute type. It probably needs to be added to GlobalInitAttrRewriter. - if (!init.value()) { + if (mlir::isa(init.value())) { + GlobalInitAttrRewriter initRewriter(llvmType, rewriter); + init = initRewriter.visit(init.value()); + // If initRewriter returned a null attribute, init will have a value but + // the value will be null. If that happens, initRewriter didn't handle the + // attribute type. It probably needs to be added to + // GlobalInitAttrRewriter. + if (!init.value()) { + op.emitError() << "unsupported initializer '" << init.value() << "'"; + return mlir::failure(); + } + } else if (mlir::isa(init.value())) { + // TODO(cir): once LLVM's dialect has proper equivalent attributes this + // should be updated. For now, we use a custom op to initialize globals + // to the appropriate value. + return matchAndRewriteRegionInitializedGlobal(op, init.value(), rewriter); + } else { + // We will only get here if new initializer types are added and this + // code is not updated to handle them. op.emitError() << "unsupported initializer '" << init.value() << "'"; return mlir::failure(); } @@ -247,6 +572,10 @@ static void prepareTypeConverter(mlir::LLVMTypeConverter &converter, return mlir::LLVM::LLVMPointerType::get(type.getContext(), targetAS); }); + converter.addConversion([&](cir::BoolType type) -> mlir::Type { + return mlir::IntegerType::get(type.getContext(), 1, + mlir::IntegerType::Signless); + }); converter.addConversion([&](cir::IntType type) -> mlir::Type { // LLVM doesn't work with signed types, so we drop the CIR signs here. return mlir::IntegerType::get(type.getContext(), type.getWidth()); @@ -276,7 +605,8 @@ static void prepareTypeConverter(mlir::LLVMTypeConverter &converter, void ConvertCIRToLLVMPass::processCIRAttrs(mlir::ModuleOp module) { // Lower the module attributes to LLVM equivalents. - if (auto tripleAttr = module->getAttr(cir::CIRDialect::getTripleAttrName())) + if (mlir::Attribute tripleAttr = + module->getAttr(cir::CIRDialect::getTripleAttrName())) module->setAttr(mlir::LLVM::LLVMDialect::getTargetTripleAttrName(), tripleAttr); } @@ -291,7 +621,16 @@ void ConvertCIRToLLVMPass::runOnOperation() { mlir::RewritePatternSet patterns(&getContext()); + patterns.add(patterns.getContext()); + // This could currently be merged with the group below, but it will get more + // arguments later, so we'll keep it separate for now. + patterns.add(converter, patterns.getContext(), dl); + patterns.add(converter, patterns.getContext(), dl); + patterns.add(converter, patterns.getContext(), dl); patterns.add(converter, patterns.getContext(), dl); + patterns.add(converter, patterns.getContext(), + dl); + patterns.add(converter, patterns.getContext()); processCIRAttrs(module); @@ -301,8 +640,9 @@ void ConvertCIRToLLVMPass::runOnOperation() { target.addIllegalDialect(); - if (failed(applyPartialConversion(module, target, std::move(patterns)))) + if (failed(applyPartialConversion(module, target, std::move(patterns)))) { signalPassFailure(); + } } std::unique_ptr createConvertCIRToLLVMPass() { diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index b3366c1fb9337..a694047e3616b 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -12,6 +12,7 @@ #ifndef CLANG_CIR_LOWERTOLLVM_H #define CLANG_CIR_LOWERTOLLVM_H +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Transforms/DialectConversion.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" @@ -19,6 +20,97 @@ namespace cir { namespace direct { +mlir::LLVM::Linkage convertLinkage(cir::GlobalLinkageKind linkage); + +class CIRToLLVMReturnOpLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::ReturnOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + +class CIRToLLVMAllocaOpLowering + : public mlir::OpConversionPattern { + mlir::DataLayout const &dataLayout; + +public: + CIRToLLVMAllocaOpLowering(mlir::TypeConverter const &typeConverter, + mlir::MLIRContext *context, + mlir::DataLayout const &dataLayout) + : OpConversionPattern(typeConverter, context), + dataLayout(dataLayout) {} + + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::AllocaOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + +class CIRToLLVMLoadOpLowering : public mlir::OpConversionPattern { + mlir::DataLayout const &dataLayout; + +public: + CIRToLLVMLoadOpLowering(const mlir::TypeConverter &typeConverter, + mlir::MLIRContext *context, + mlir::DataLayout const &dataLayout) + : OpConversionPattern(typeConverter, context), dataLayout(dataLayout) {} + + mlir::LogicalResult + matchAndRewrite(cir::LoadOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + +class CIRToLLVMStoreOpLowering + : public mlir::OpConversionPattern { + mlir::DataLayout const &dataLayout; + +public: + CIRToLLVMStoreOpLowering(const mlir::TypeConverter &typeConverter, + mlir::MLIRContext *context, + mlir::DataLayout const &dataLayout) + : OpConversionPattern(typeConverter, context), dataLayout(dataLayout) {} + + mlir::LogicalResult + matchAndRewrite(cir::StoreOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + +class CIRToLLVMConstantOpLowering + : public mlir::OpConversionPattern { + mlir::DataLayout const &dataLayout; + +public: + CIRToLLVMConstantOpLowering(const mlir::TypeConverter &typeConverter, + mlir::MLIRContext *context, + mlir::DataLayout const &dataLayout) + : OpConversionPattern(typeConverter, context), dataLayout(dataLayout) { + setHasBoundedRewriteRecursion(); + } + + mlir::LogicalResult + matchAndRewrite(cir::ConstantOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + +class CIRToLLVMFuncOpLowering : public mlir::OpConversionPattern { + static mlir::StringRef getLinkageAttrNameString() { return "linkage"; } + + void lowerFuncAttributes( + cir::FuncOp func, bool filterArgAndResAttrs, + mlir::SmallVectorImpl &result) const; + +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::FuncOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + class CIRToLLVMGlobalOpLowering : public mlir::OpConversionPattern { const mlir::DataLayout &dataLayout; @@ -36,8 +128,6 @@ class CIRToLLVMGlobalOpLowering mlir::ConversionPatternRewriter &rewriter) const override; private: - bool attrRequiresRegionInitialization(mlir::Attribute attr) const; - mlir::LogicalResult matchAndRewriteRegionInitializedGlobal( cir::GlobalOp op, mlir::Attribute init, mlir::ConversionPatternRewriter &rewriter) const; diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp index 68887cd7916c7..0a612d3461dc2 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.cpp +++ b/clang/lib/CodeGen/ABIInfoImpl.cpp @@ -430,7 +430,7 @@ Address CodeGen::EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, CharUnits TyAlignForABI = TyInfo.Align; llvm::Type *ElementTy = CGF.ConvertTypeForMem(Ty); - llvm::Type *BaseTy = llvm::PointerType::getUnqual(ElementTy); + llvm::Type *BaseTy = llvm::PointerType::getUnqual(CGF.getLLVMContext()); llvm::Value *Addr = CGF.Builder.CreateVAArg(VAListAddr.emitRawPointer(CGF), BaseTy); return Address(Addr, ElementTy, TyAlignForABI); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 1750719e17670..62a0e3c69bad1 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -595,7 +595,7 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) { void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { // Create the TargetMachine for generating code. std::string Error; - std::string Triple = TheModule->getTargetTriple(); + std::string Triple = TheModule->getTargetTriple().str(); const llvm::Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); if (!TheTarget) { if (MustCreateTM) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 65fac01d58362..b86bb242755be 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -859,6 +859,24 @@ static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E, StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList); } +static llvm::Value *emitModfBuiltin(CodeGenFunction &CGF, const CallExpr *E, + llvm::Intrinsic::ID IntrinsicID) { + llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *IntPartDest = CGF.EmitScalarExpr(E->getArg(1)); + + llvm::Value *Call = + CGF.Builder.CreateIntrinsic(IntrinsicID, {Val->getType()}, Val); + + llvm::Value *FractionalResult = CGF.Builder.CreateExtractValue(Call, 0); + llvm::Value *IntegralResult = CGF.Builder.CreateExtractValue(Call, 1); + + QualType DestPtrType = E->getArg(1)->getType()->getPointeeType(); + LValue IntegralLV = CGF.MakeNaturalAlignAddrLValue(IntPartDest, DestPtrType); + CGF.EmitStoreOfScalar(IntegralResult, IntegralLV); + + return FractionalResult; +} + /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); @@ -4120,6 +4138,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_frexpf128: case Builtin::BI__builtin_frexpf16: return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp)); + case Builtin::BImodf: + case Builtin::BImodff: + case Builtin::BImodfl: + case Builtin::BI__builtin_modf: + case Builtin::BI__builtin_modff: + case Builtin::BI__builtin_modfl: + if (Builder.getIsFPConstrained()) + break; // TODO: Emit constrained modf intrinsic once one exists. + return RValue::get(emitModfBuiltin(*this, E, Intrinsic::modf)); case Builtin::BI__builtin_isgreater: case Builtin::BI__builtin_isgreaterequal: case Builtin::BI__builtin_isless: @@ -4378,7 +4405,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, : llvm::Intrinsic::umax, Op0, Op1, nullptr, "elt.max"); } else - Result = Builder.CreateMaxNum(Op0, Op1, "elt.max"); + Result = Builder.CreateMaxNum(Op0, Op1, /*FMFSource=*/nullptr, "elt.max"); return RValue::get(Result); } case Builtin::BI__builtin_elementwise_min: { @@ -4394,7 +4421,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, : llvm::Intrinsic::umin, Op0, Op1, nullptr, "elt.min"); } else - Result = Builder.CreateMinNum(Op0, Op1, "elt.min"); + Result = Builder.CreateMinNum(Op0, Op1, /*FMFSource=*/nullptr, "elt.min"); return RValue::get(Result); } @@ -6205,13 +6232,25 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this); - llvm::Type *RangeTy = NDRangeL.getAddress().getType(); + + // FIXME: Look through the addrspacecast which may exist to the stack + // temporary as a hack. + // + // This is hardcoding the assumed ABI of the target function. This assumes + // direct passing for every argument except NDRange, which is assumed to be + // byval or byref indirect passed. + // + // This should be fixed to query a signature from CGOpenCLRuntime, and go + // through EmitCallArgs to get the correct target ABI. + Range = Range->stripPointerCasts(); + + llvm::Type *RangePtrTy = Range->getType(); if (NumArgs == 4) { // The most basic form of the call with parameters: // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) Name = "__enqueue_kernel_basic"; - llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy, + llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangePtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys), false); @@ -6286,7 +6325,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Block, ConstantInt::get(IntTy, NumArgs - 4), ElemPtr}; llvm::Type *const ArgTys[] = { - QueueTy, IntTy, RangeTy, GenericVoidPtrTy, + QueueTy, IntTy, RangePtrTy, GenericVoidPtrTy, GenericVoidPtrTy, IntTy, ElemPtr->getType()}; llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); @@ -6337,7 +6376,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); std::vector ArgTys = { - QueueTy, Int32Ty, RangeTy, Int32Ty, + QueueTy, Int32Ty, RangePtrTy, Int32Ty, PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; std::vector Args = {Queue, Flags, Range, @@ -10223,6 +10262,7 @@ llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) { default: llvm_unreachable("Invalid SVETypeFlag!"); + case SVETypeFlags::EltTyMFloat8: case SVETypeFlags::EltTyInt8: return Builder.getInt8Ty(); case SVETypeFlags::EltTyInt16: @@ -10651,7 +10691,7 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, unsigned IntrinsicID, bool IsZExtReturn) { QualType LangPTy = E->getArg(1)->getType(); - llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem( LangPTy->castAs()->getPointeeType()); // The vector type that is returned may be different from the @@ -10698,7 +10738,7 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, SmallVectorImpl &Ops, unsigned IntrinsicID) { QualType LangPTy = E->getArg(1)->getType(); - llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem( LangPTy->castAs()->getPointeeType()); // The vector type that is stored may be different from the @@ -19469,6 +19509,62 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return nullptr; switch (BuiltinID) { + case Builtin::BI__builtin_hlsl_adduint64: { + Value *OpA = EmitScalarExpr(E->getArg(0)); + Value *OpB = EmitScalarExpr(E->getArg(1)); + QualType Arg0Ty = E->getArg(0)->getType(); + uint64_t NumElements = Arg0Ty->castAs()->getNumElements(); + assert(Arg0Ty == E->getArg(1)->getType() && + "AddUint64 operand types must match"); + assert(Arg0Ty->hasIntegerRepresentation() && + "AddUint64 operands must have an integer representation"); + assert((NumElements == 2 || NumElements == 4) && + "AddUint64 operands must have 2 or 4 elements"); + + llvm::Value *LowA; + llvm::Value *HighA; + llvm::Value *LowB; + llvm::Value *HighB; + + // Obtain low and high words of inputs A and B + if (NumElements == 2) { + LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA"); + HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA"); + LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB"); + HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB"); + } else { + LowA = Builder.CreateShuffleVector(OpA, ArrayRef{0, 2}, "LowA"); + HighA = Builder.CreateShuffleVector(OpA, ArrayRef{1, 3}, "HighA"); + LowB = Builder.CreateShuffleVector(OpB, ArrayRef{0, 2}, "LowB"); + HighB = Builder.CreateShuffleVector(OpB, ArrayRef{1, 3}, "HighB"); + } + + // Use an uadd_with_overflow to compute the sum of low words and obtain a + // carry value + llvm::Value *Carry; + llvm::Value *LowSum = EmitOverflowIntrinsic( + *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry); + llvm::Value *ZExtCarry = + Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt"); + + // Sum the high words and the carry + llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum"); + llvm::Value *HighSumPlusCarry = + Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry"); + + if (NumElements == 4) { + return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry, + ArrayRef{0, 2, 1, 3}, + "hlsl.AddUint64"); + } + + llvm::Value *Result = PoisonValue::get(OpA->getType()); + Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0, + "hlsl.AddUint64.upto0"); + Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, (uint64_t)1, + "hlsl.AddUint64"); + return Result; + } case Builtin::BI__builtin_hlsl_resource_getpointer: { Value *HandleOp = EmitScalarExpr(E->getArg(0)); Value *IndexOp = EmitScalarExpr(E->getArg(1)); @@ -19492,6 +19588,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Value *Op1 = EmitScalarExpr(E->getArg(1)); return Builder.CreateAnd(Op0, Op1, "hlsl.and"); } + case Builtin::BI__builtin_hlsl_or: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + return Builder.CreateOr(Op0, Op1, "hlsl.or"); + } case Builtin::BI__builtin_hlsl_any: { Value *Op0 = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp index 78a7b021855b7..6f47e24eed5b3 100644 --- a/clang/lib/CodeGen/CGCXX.cpp +++ b/clang/lib/CodeGen/CGCXX.cpp @@ -175,7 +175,6 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // requires explicit comdat support in the IL. if (llvm::GlobalValue::isWeakForLinker(TargetLinkage)) return true; - // Create the alias with no name. auto *Alias = llvm::GlobalAlias::create(AliasValueType, 0, Linkage, "", Aliasee, &getModule()); @@ -201,6 +200,42 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { return false; } +/// Emit a definition as a global alias for another definition, unconditionally. +void CodeGenModule::EmitDefinitionAsAlias(GlobalDecl AliasDecl, + GlobalDecl TargetDecl) { + + llvm::Type *AliasValueType = getTypes().GetFunctionType(AliasDecl); + + StringRef MangledName = getMangledName(AliasDecl); + llvm::GlobalValue *Entry = GetGlobalValue(MangledName); + if (Entry && !Entry->isDeclaration()) + return; + auto *Aliasee = cast(GetAddrOfGlobal(TargetDecl)); + + // Determine the linkage type for the alias. + llvm::GlobalValue::LinkageTypes Linkage = getFunctionLinkage(AliasDecl); + + // Create the alias with no name. + auto *Alias = llvm::GlobalAlias::create(AliasValueType, 0, Linkage, "", + Aliasee, &getModule()); + // Destructors are always unnamed_addr. + Alias->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + + if (Entry) { + assert(Entry->getValueType() == AliasValueType && + Entry->getAddressSpace() == Alias->getAddressSpace() && + "declaration exists with different type"); + Alias->takeName(Entry); + Entry->replaceAllUsesWith(Alias); + Entry->eraseFromParent(); + } else { + Alias->setName(MangledName); + } + + // Set any additional necessary attributes for the alias. + SetCommonAttributes(AliasDecl, Alias); +} + llvm::Function *CodeGenModule::codegenCXXStructor(GlobalDecl GD) { const CGFunctionInfo &FnInfo = getTypes().arrangeCXXStructorDeclaration(GD); auto *Fn = cast( diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp index 7c6dfc3e59d8c..e109fd0c443f6 100644 --- a/clang/lib/CodeGen/CGCXXABI.cpp +++ b/clang/lib/CodeGen/CGCXXABI.cpp @@ -273,6 +273,20 @@ void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, Address ptr, numElements = readArrayCookieImpl(CGF, allocAddr, cookieSize); } +void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, Address ptr, + QualType eltTy, llvm::Value *&numElements, + llvm::Value *&allocPtr, CharUnits &cookieSize) { + assert(eltTy.isDestructedType()); + + // Derive a char* in the same address space as the pointer. + ptr = ptr.withElementType(CGF.Int8Ty); + + cookieSize = getArrayCookieSizeImpl(eltTy); + Address allocAddr = CGF.Builder.CreateConstInBoundsByteGEP(ptr, -cookieSize); + allocPtr = allocAddr.emitRawPointer(CGF); + numElements = readArrayCookieImpl(CGF, allocAddr, cookieSize); +} + llvm::Value *CGCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, Address ptr, CharUnits cookieSize) { diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h index 687ff7fb84444..148a7ba6df7e6 100644 --- a/clang/lib/CodeGen/CGCXXABI.h +++ b/clang/lib/CodeGen/CGCXXABI.h @@ -275,6 +275,7 @@ class CGCXXABI { virtual CatchTypeInfo getCatchAllTypeInfo(); virtual bool shouldTypeidBeNullChecked(QualType SrcRecordTy) = 0; + virtual bool hasVectorDeletingDtors() = 0; virtual void EmitBadTypeidCall(CodeGenFunction &CGF) = 0; virtual llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy, Address ThisPtr, @@ -575,6 +576,12 @@ class CGCXXABI { QualType ElementType, llvm::Value *&NumElements, llvm::Value *&AllocPtr, CharUnits &CookieSize); + /// Reads the array cookie associated with the given pointer, + /// that should have one. + void ReadArrayCookie(CodeGenFunction &CGF, Address Ptr, QualType ElementType, + llvm::Value *&NumElements, llvm::Value *&AllocPtr, + CharUnits &CookieSize); + /// Return whether the given global decl needs a VTT parameter. virtual bool NeedsVTTParameter(GlobalDecl GD); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 916455bc69393..bfcbc273dbda7 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -77,6 +77,22 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { // clang-format off case CC_RISCVVectorCall: return llvm::CallingConv::RISCV_VectorCall; // clang-format on +#define CC_VLS_CASE(ABI_VLEN) \ + case CC_RISCVVLSCall_##ABI_VLEN: \ + return llvm::CallingConv::RISCV_VLSCall_##ABI_VLEN; + CC_VLS_CASE(32) + CC_VLS_CASE(64) + CC_VLS_CASE(128) + CC_VLS_CASE(256) + CC_VLS_CASE(512) + CC_VLS_CASE(1024) + CC_VLS_CASE(2048) + CC_VLS_CASE(4096) + CC_VLS_CASE(8192) + CC_VLS_CASE(16384) + CC_VLS_CASE(32768) + CC_VLS_CASE(65536) +#undef CC_VLS_CASE } } @@ -266,6 +282,29 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D, if (D->hasAttr()) return CC_RISCVVectorCall; + if (RISCVVLSCCAttr *PCS = D->getAttr()) { + switch (PCS->getVectorWidth()) { + default: + llvm_unreachable("Invalid RISC-V VLS ABI VLEN"); +#define CC_VLS_CASE(ABI_VLEN) \ + case ABI_VLEN: \ + return CC_RISCVVLSCall_##ABI_VLEN; + CC_VLS_CASE(32) + CC_VLS_CASE(64) + CC_VLS_CASE(128) + CC_VLS_CASE(256) + CC_VLS_CASE(512) + CC_VLS_CASE(1024) + CC_VLS_CASE(2048) + CC_VLS_CASE(4096) + CC_VLS_CASE(8192) + CC_VLS_CASE(16384) + CC_VLS_CASE(32768) + CC_VLS_CASE(65536) +#undef CC_VLS_CASE + } + } + return CC_C; } @@ -3234,6 +3273,17 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, } } + // Struct of fixed-length vectors and struct of array of fixed-length + // vector in VLS calling convention are coerced to vector tuple + // type(represented as TargetExtType) and scalable vector type + // respectively, they're no longer handled as struct. + if (ArgI.isDirect() && isa(ConvertType(Ty)) && + (isa(ArgI.getCoerceToType()) || + isa(ArgI.getCoerceToType()))) { + ArgVals.push_back(ParamValue::forDirect(AI)); + break; + } + llvm::StructType *STy = dyn_cast(ArgI.getCoerceToType()); Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg), diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 7a1096fcbca82..e54fd543f217b 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -1433,6 +1433,70 @@ static bool CanSkipVTablePointerInitialization(CodeGenFunction &CGF, return true; } +static void EmitConditionalArrayDtorCall(const CXXDestructorDecl *DD, + CodeGenFunction &CGF, + llvm::Value *ShouldDeleteCondition) { + Address ThisPtr = CGF.LoadCXXThisAddress(); + llvm::BasicBlock *ScalarBB = CGF.createBasicBlock("dtor.scalar"); + llvm::BasicBlock *callDeleteBB = + CGF.createBasicBlock("dtor.call_delete_after_array_destroy"); + llvm::BasicBlock *VectorBB = CGF.createBasicBlock("dtor.vector"); + auto *CondTy = cast(ShouldDeleteCondition->getType()); + llvm::Value *CheckTheBitForArrayDestroy = CGF.Builder.CreateAnd( + ShouldDeleteCondition, llvm::ConstantInt::get(CondTy, 2)); + llvm::Value *ShouldDestroyArray = + CGF.Builder.CreateIsNull(CheckTheBitForArrayDestroy); + CGF.Builder.CreateCondBr(ShouldDestroyArray, ScalarBB, VectorBB); + + CGF.EmitBlock(VectorBB); + + llvm::Value *numElements = nullptr; + llvm::Value *allocatedPtr = nullptr; + CharUnits cookieSize; + QualType EltTy = DD->getThisType()->getPointeeType(); + CGF.CGM.getCXXABI().ReadArrayCookie(CGF, ThisPtr, EltTy, numElements, + allocatedPtr, cookieSize); + + // Destroy the elements. + QualType::DestructionKind dtorKind = EltTy.isDestructedType(); + + assert(dtorKind); + assert(numElements && "no element count for a type with a destructor!"); + + CharUnits elementSize = CGF.getContext().getTypeSizeInChars(EltTy); + CharUnits elementAlign = + ThisPtr.getAlignment().alignmentOfArrayElement(elementSize); + + llvm::Value *arrayBegin = ThisPtr.emitRawPointer(CGF); + llvm::Value *arrayEnd = CGF.Builder.CreateInBoundsGEP( + ThisPtr.getElementType(), arrayBegin, numElements, "delete.end"); + + // We already checked that the array is not 0-length before entering vector + // deleting dtor. + CGF.emitArrayDestroy(arrayBegin, arrayEnd, EltTy, elementAlign, + CGF.getDestroyer(dtorKind), + /*checkZeroLength*/ false, CGF.needsEHCleanup(dtorKind)); + + llvm::BasicBlock *VectorBBCont = CGF.createBasicBlock("dtor.vector.cont"); + CGF.EmitBlock(VectorBBCont); + + llvm::Value *CheckTheBitForDeleteCall = CGF.Builder.CreateAnd( + ShouldDeleteCondition, llvm::ConstantInt::get(CondTy, 1)); + + llvm::Value *ShouldCallDelete = + CGF.Builder.CreateIsNull(CheckTheBitForDeleteCall); + CGF.Builder.CreateCondBr(ShouldCallDelete, CGF.ReturnBlock.getBlock(), + callDeleteBB); + CGF.EmitBlock(callDeleteBB); + const CXXDestructorDecl *Dtor = cast(CGF.CurCodeDecl); + const CXXRecordDecl *ClassDecl = Dtor->getParent(); + CGF.EmitDeleteCall(Dtor->getOperatorDelete(), allocatedPtr, + CGF.getContext().getTagDeclType(ClassDecl)); + + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(ScalarBB); +} + /// EmitDestructorBody - Emits the body of the current destructor. void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { const CXXDestructorDecl *Dtor = cast(CurGD.getDecl()); @@ -1462,7 +1526,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { // outside of the function-try-block, which means it's always // possible to delegate the destructor body to the complete // destructor. Do so. - if (DtorType == Dtor_Deleting) { + if (DtorType == Dtor_Deleting || DtorType == Dtor_VectorDeleting) { + if (CXXStructorImplicitParamValue && DtorType == Dtor_VectorDeleting) + EmitConditionalArrayDtorCall(Dtor, *this, CXXStructorImplicitParamValue); RunCleanupsScope DtorEpilogue(*this); EnterDtorCleanups(Dtor, Dtor_Deleting); if (HaveInsertPoint()) { @@ -1491,6 +1557,8 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { switch (DtorType) { case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT"); case Dtor_Deleting: llvm_unreachable("already handled deleting case"); + case Dtor_VectorDeleting: + llvm_unreachable("already handled vector deleting case"); case Dtor_Complete: assert((Body || getTarget().getCXXABI().isMicrosoft()) && @@ -1573,7 +1641,6 @@ namespace { return CGF.EmitScalarExpr(ThisArg); return CGF.LoadCXXThis(); } - /// Call the operator delete associated with the current destructor. struct CallDtorDelete final : EHScopeStack::Cleanup { CallDtorDelete() {} @@ -1592,8 +1659,10 @@ namespace { bool ReturnAfterDelete) { llvm::BasicBlock *callDeleteBB = CGF.createBasicBlock("dtor.call_delete"); llvm::BasicBlock *continueBB = CGF.createBasicBlock("dtor.continue"); - llvm::Value *ShouldCallDelete - = CGF.Builder.CreateIsNull(ShouldDeleteCondition); + auto *CondTy = cast(ShouldDeleteCondition->getType()); + llvm::Value *CheckTheBit = CGF.Builder.CreateAnd( + ShouldDeleteCondition, llvm::ConstantInt::get(CondTy, 1)); + llvm::Value *ShouldCallDelete = CGF.Builder.CreateIsNull(CheckTheBit); CGF.Builder.CreateCondBr(ShouldCallDelete, continueBB, callDeleteBB); CGF.EmitBlock(callDeleteBB); @@ -2937,9 +3006,13 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); llvm::Value *TypeId = llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD); + auto CheckedLoadIntrinsic = CGM.getVTables().useRelativeLayout() + ? llvm::Intrinsic::type_checked_load_relative + : llvm::Intrinsic::type_checked_load; llvm::Value *CheckedLoad = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_checked_load), + CGM.getIntrinsic(CheckedLoadIntrinsic), {VTable, llvm::ConstantInt::get(Int32Ty, VTableByteOffset), TypeId}); + llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); std::string TypeName = RD->getQualifiedNameAsString(); diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 9abf2e8c9190d..a9795c2c0dc8f 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -855,6 +855,20 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // Create parameter copies. We do it before creating a promise, since an // evolution of coroutine TS may allow promise constructor to observe // parameter copies. + for (const ParmVarDecl *Parm : FnArgs) { + // If the original param is in an alloca, exclude it from the coroutine + // frame. The parameter copy will be part of the frame, but the original + // parameter memory should remain on the stack. This is necessary to + // ensure that parameters destroyed in callees, as with `trivial_abi` or + // in the MSVC C++ ABI, are appropriately destroyed after setting up the + // coroutine. + Address ParmAddr = GetAddrOfLocalVar(Parm); + if (auto *ParmAlloca = + dyn_cast(ParmAddr.getBasePointer())) { + ParmAlloca->setMetadata(llvm::LLVMContext::MD_coro_outside_frame, + llvm::MDNode::get(CGM.getLLVMContext(), {})); + } + } for (auto *PM : S.getParamMoves()) { EmitStmt(PM); ParamReplacer.addCopy(cast(PM)); @@ -942,9 +956,16 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { if (Stmt *Ret = S.getReturnStmt()) { // Since we already emitted the return value above, so we shouldn't // emit it again here. - if (GroManager.DirectEmit) + Expr *PreviousRetValue = nullptr; + if (GroManager.DirectEmit) { + PreviousRetValue = cast(Ret)->getRetValue(); cast(Ret)->setRetValue(nullptr); + } EmitStmt(Ret); + // Set the return value back. The code generator, as the AST **Consumer**, + // shouldn't change the AST. + if (PreviousRetValue) + cast(Ret)->setRetValue(PreviousRetValue); } // LLVM require the frontend to mark the coroutine. diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 46ad11e64c4d5..0e6daa42ee7bf 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1593,6 +1593,21 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_LLVM_PreserveNone; case CC_RISCVVectorCall: return llvm::dwarf::DW_CC_LLVM_RISCVVectorCall; +#define CC_VLS_CASE(ABI_VLEN) case CC_RISCVVLSCall_##ABI_VLEN: + CC_VLS_CASE(32) + CC_VLS_CASE(64) + CC_VLS_CASE(128) + CC_VLS_CASE(256) + CC_VLS_CASE(512) + CC_VLS_CASE(1024) + CC_VLS_CASE(2048) + CC_VLS_CASE(4096) + CC_VLS_CASE(8192) + CC_VLS_CASE(16384) + CC_VLS_CASE(32768) + CC_VLS_CASE(65536) +#undef CC_VLS_CASE + return llvm::dwarf::DW_CC_LLVM_RISCVVLSCall; } return 0; } @@ -2104,7 +2119,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // Emit MS ABI vftable information. There is only one entry for the // deleting dtor. const auto *DD = dyn_cast(Method); - GlobalDecl GD = DD ? GlobalDecl(DD, Dtor_Deleting) : GlobalDecl(Method); + GlobalDecl GD = + DD ? GlobalDecl(DD, Dtor_VectorDeleting) : GlobalDecl(Method); MethodVFTableLocation ML = CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD); VIndex = ML.Index; diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 668282a6ab1a8..3ad9ebf624143 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -27,6 +27,7 @@ #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenACC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/TargetInfo.h" @@ -177,6 +178,11 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::OMPDeclareMapper: return CGM.EmitOMPDeclareMapper(cast(&D), this); + case Decl::OpenACCDeclare: + return CGM.EmitOpenACCDeclare(cast(&D), this); + case Decl::OpenACCRoutine: + return CGM.EmitOpenACCRoutine(cast(&D), this); + case Decl::Typedef: // typedef int X; case Decl::TypeAlias: { // using X = int; [C++0x] QualType Ty = cast(D).getUnderlyingType(); @@ -2843,6 +2849,16 @@ void CodeGenModule::EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D, getOpenMPRuntime().emitUserDefinedMapper(D, CGF); } +void CodeGenModule::EmitOpenACCDeclare(const OpenACCDeclareDecl *D, + CodeGenFunction *CGF) { + // This is a no-op, we cna just ignore these declarations. +} + +void CodeGenModule::EmitOpenACCRoutine(const OpenACCRoutineDecl *D, + CodeGenFunction *CGF) { + // This is a no-op, we cna just ignore these declarations. +} + void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { getOpenMPRuntime().processRequiresDirective(D); } diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index f71c18a8041b1..d4e14f4574b87 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1209,6 +1209,8 @@ void CodeGenFunction::EmitNewArrayInitializer( EmitCXXAggrConstructorCall(Ctor, NumElements, CurPtr, CCE, /*NewPointerIsChecked*/true, CCE->requiresZeroInitialization()); + if (CGM.getCXXABI().hasVectorDeletingDtors()) + CGM.requireVectorDestructorDefinition(Ctor->getParent()); return; } @@ -1912,10 +1914,8 @@ static void EmitDestroyingObjectDelete(CodeGenFunction &CGF, /// Emit the code for deleting a single object. /// \return \c true if we started emitting UnconditionalDeleteBlock, \c false /// if not. -static bool EmitObjectDelete(CodeGenFunction &CGF, - const CXXDeleteExpr *DE, - Address Ptr, - QualType ElementType, +static bool EmitObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE, + Address Ptr, QualType ElementType, llvm::BasicBlock *UnconditionalDeleteBlock) { // C++11 [expr.delete]p3: // If the static type of the object to be deleted is different from its @@ -2131,6 +2131,40 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) { assert(ConvertTypeForMem(DeleteTy) == Ptr.getElementType()); + if (E->isArrayForm() && CGM.getCXXABI().hasVectorDeletingDtors()) { + if (auto *RD = DeleteTy->getAsCXXRecordDecl()) { + auto *Dtor = RD->getDestructor(); + if (Dtor && Dtor->isVirtual()) { + llvm::Value *NumElements = nullptr; + llvm::Value *AllocatedPtr = nullptr; + CharUnits CookieSize; + llvm::BasicBlock *bodyBB = createBasicBlock("vdtor.call"); + llvm::BasicBlock *doneBB = createBasicBlock("vdtor.nocall"); + // Check array cookie to see if the array has 0 length. Don't call + // the destructor in that case. + CGM.getCXXABI().ReadArrayCookie(*this, Ptr, E, DeleteTy, NumElements, + AllocatedPtr, CookieSize); + + auto *CondTy = cast(NumElements->getType()); + llvm::Value *isEmpty = Builder.CreateICmpEQ( + NumElements, llvm::ConstantInt::get(CondTy, 0)); + Builder.CreateCondBr(isEmpty, doneBB, bodyBB); + + // Delete cookie for empty array. + const FunctionDecl *operatorDelete = E->getOperatorDelete(); + EmitBlock(doneBB); + EmitDeleteCall(operatorDelete, AllocatedPtr, DeleteTy, NumElements, + CookieSize); + EmitBranch(DeleteEnd); + + EmitBlock(bodyBB); + if (!EmitObjectDelete(*this, E, Ptr, DeleteTy, DeleteEnd)) + EmitBlock(DeleteEnd); + return; + } + } + } + if (E->isArrayForm()) { EmitArrayDelete(*this, E, Ptr, DeleteTy); EmitBlock(DeleteEnd); diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index ee5874b26f534..08e42a9e1dcf3 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1883,8 +1883,11 @@ llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) { // Try to emit the initializer. Note that this can allow some things that // are not allowed by tryEmitPrivateForMemory alone. - if (APValue *value = D.evaluateValue()) + if (APValue *value = D.evaluateValue()) { + assert(!value->allowConstexprUnknown() && + "Constexpr unknown values are not allowed in CodeGen"); return tryEmitPrivateForMemory(*value, destType); + } return nullptr; } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index ed6d2036cb984..dc34653e8f497 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -85,16 +85,11 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() { return CGM.getTarget().getTriple().getArch(); } -// Returns true if the type is an HLSL resource class -static bool isResourceRecordType(const clang::Type *Ty) { - return HLSLAttributedResourceType::findHandleTypeOnResource(Ty) != nullptr; -} - // Returns true if the type is an HLSL resource class or an array of them static bool isResourceRecordTypeOrArrayOf(const clang::Type *Ty) { while (const ConstantArrayType *CAT = dyn_cast(Ty)) Ty = CAT->getArrayElementTypeNoTypeQual(); - return isResourceRecordType(Ty); + return Ty->isHLSLResourceRecord(); } // Emits constant global variables for buffer constants declarations @@ -658,7 +653,7 @@ void CGHLSLRuntime::handleGlobalVarDefinition(const VarDecl *VD, // on? return; - if (!isResourceRecordType(VD->getType().getTypePtr())) + if (!VD->getType().getTypePtr()->isHLSLResourceRecord()) // FIXME: Only simple declarations of resources are supported for now. // Arrays of resources or resources in user defined classes are // not implemented yet. diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index d1876f47c0eea..495060ea0e6a2 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -819,7 +819,7 @@ class CGObjCGNUstep : public CGObjCGNU { const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime; SlotStructTy = llvm::StructType::get(PtrTy, PtrTy, PtrTy, IntTy, IMPTy); - SlotTy = llvm::PointerType::getUnqual(SlotStructTy); + SlotTy = PtrTy; // Slot_t objc_msg_lookup_sender(id *receiver, SEL selector, id sender); SlotLookupFn.init(&CGM, "objc_msg_lookup_sender", SlotTy, PtrToIdTy, SelectorTy, IdTy); @@ -2284,10 +2284,12 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, BoolTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); Int8Ty = llvm::Type::getInt8Ty(VMContext); + + PtrTy = llvm::PointerType::getUnqual(cgm.getLLVMContext()); + PtrToIntTy = PtrTy; // C string type. Used in lots of places. - PtrToInt8Ty = llvm::PointerType::getUnqual(Int8Ty); - ProtocolPtrTy = llvm::PointerType::getUnqual( - Types.ConvertType(CGM.getContext().getObjCProtoType())); + PtrToInt8Ty = PtrTy; + ProtocolPtrTy = PtrTy; Zeros[0] = llvm::ConstantInt::get(LongTy, 0); Zeros[1] = Zeros[0]; @@ -2302,9 +2304,6 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, SelectorElemTy = CGM.getTypes().ConvertTypeForMem(selTy->getPointeeType()); } - PtrToIntTy = llvm::PointerType::getUnqual(IntTy); - PtrTy = PtrToInt8Ty; - Int32Ty = llvm::Type::getInt32Ty(VMContext); Int64Ty = llvm::Type::getInt64Ty(VMContext); @@ -2323,7 +2322,7 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, IdTy = PtrToInt8Ty; IdElemTy = Int8Ty; } - PtrToIdTy = llvm::PointerType::getUnqual(IdTy); + PtrToIdTy = PtrTy; ProtocolTy = llvm::StructType::get(IdTy, PtrToInt8Ty, // name PtrToInt8Ty, // protocols @@ -2351,7 +2350,7 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, PtrToInt8Ty, PtrToInt8Ty }); ObjCSuperTy = llvm::StructType::get(IdTy, IdTy); - PtrToObjCSuperTy = llvm::PointerType::getUnqual(ObjCSuperTy); + PtrToObjCSuperTy = PtrTy; llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); @@ -2383,9 +2382,7 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, PtrDiffTy, BoolTy, BoolTy); // IMP type - llvm::Type *IMPArgs[] = { IdTy, SelectorTy }; - IMPTy = llvm::PointerType::getUnqual(llvm::FunctionType::get(IdTy, IMPArgs, - true)); + IMPTy = PtrTy; const LangOptions &Opts = CGM.getLangOpts(); if ((Opts.getGC() != LangOptions::NonGC) || Opts.ObjCAutoRefCount) @@ -2679,8 +2676,6 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, Class->getSuperClass()->getNameAsString(), /*isWeak*/false); if (IsClassMessage) { // Load the isa pointer of the superclass is this is a class method. - ReceiverClass = Builder.CreateBitCast(ReceiverClass, - llvm::PointerType::getUnqual(IdTy)); ReceiverClass = Builder.CreateAlignedLoad(IdTy, ReceiverClass, CGF.getPointerAlign()); } @@ -2721,8 +2716,6 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, } // Cast the pointer to a simplified version of the class structure llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy); - ReceiverClass = Builder.CreateBitCast(ReceiverClass, - llvm::PointerType::getUnqual(CastTy)); // Get the superclass pointer ReceiverClass = Builder.CreateStructGEP(CastTy, ReceiverClass, 1); // Load the superclass pointer @@ -3269,10 +3262,7 @@ CGObjCGNU::GenerateProtocolList(ArrayRef Protocols) { llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) { - auto protocol = GenerateProtocolRef(PD); - llvm::Type *T = - CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType()); - return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T)); + return GenerateProtocolRef(PD); } llvm::Constant *CGObjCGNU::GenerateProtocolRef(const ObjCProtocolDecl *PD) { diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 01552b6e53d00..639c38e7c4555 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -1545,7 +1545,8 @@ class CGObjCNonFragileABIMac : public CGObjCCommonMac { bool isClassLayoutKnownStatically(const ObjCInterfaceDecl *ID) { // Test a class by checking its superclasses up to // its base class if it has one. - for (; ID; ID = ID->getSuperClass()) { + assert(ID != nullptr && "Passed a null class to check layout"); + for (; ID != nullptr; ID = ID->getSuperClass()) { // The layout of base class NSObject // is guaranteed to be statically known if (ID->getIdentifier()->getName() == "NSObject") @@ -1556,7 +1557,9 @@ class CGObjCNonFragileABIMac : public CGObjCCommonMac { if (!ID->getImplementation()) return false; } - return false; + + // We know the layout of all the intermediate classes and superclasses. + return true; } public: diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c2289985e9519..06a652c146fb9 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1475,16 +1475,6 @@ llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { return OMPBuilder.IdentPtr; } -llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { - if (!Kmpc_MicroTy) { - // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) - llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), - llvm::PointerType::getUnqual(CGM.Int32Ty)}; - Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); - } - return llvm::PointerType::getUnqual(Kmpc_MicroTy); -} - static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD) { std::optional DevTy = @@ -1857,11 +1847,10 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, this](CodeGenFunction &CGF, PrePostActionTy &) { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); - CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RTLoc, CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars - CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; + OutlinedFn}; llvm::SmallVector RealArgs; RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); @@ -9937,7 +9926,7 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, llvm::Value *Args[] = { RTLoc, CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars - CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; + OutlinedFn}; llvm::SmallVector RealArgs; RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 3791bb7159235..4321712e1521d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -386,10 +386,6 @@ class CGOpenMPRuntime { /// Map for SourceLocation and OpenMP runtime library debug locations. typedef llvm::DenseMap OpenMPDebugLocMapTy; OpenMPDebugLocMapTy OpenMPDebugLocMap; - /// The type for a microtask which gets passed to __kmpc_fork_call(). - /// Original representation is: - /// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...); - llvm::FunctionType *Kmpc_MicroTy = nullptr; /// Stores debug location and ThreadID for the function. struct DebugLocThreadIdTy { llvm::Value *DebugLoc; @@ -530,9 +526,6 @@ class CGOpenMPRuntime { /// Build type kmp_routine_entry_t (if not built yet). void emitKmpRoutineEntryT(QualType KmpInt32Ty); - /// Returns pointer to kmpc_micro type. - llvm::Type *getKmpc_MicroPointerTy(); - /// If the specified mangled name is not in the module, create and /// return threadprivate cache object. This object is a pointer's worth of /// storage that's reserved for use by the OpenMP runtime. diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index e56ba6c3e8803..abe799af32c6e 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -494,6 +494,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { break; case Stmt::OpenACCAtomicConstructClass: EmitOpenACCAtomicConstruct(cast(*S)); + break; + case Stmt::OpenACCCacheConstructClass: + EmitOpenACCCacheConstruct(cast(*S)); + break; } } diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index c9108938bca50..66d75efa0a9a0 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -769,7 +769,8 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder, case VTableComponent::CK_FunctionPointer: case VTableComponent::CK_CompleteDtorPointer: case VTableComponent::CK_DeletingDtorPointer: { - GlobalDecl GD = component.getGlobalDecl(); + GlobalDecl GD = + component.getGlobalDecl(CGM.getCXXABI().hasVectorDeletingDtors()); const bool IsThunk = nextVTableThunkIndex < layout.vtable_thunks().size() && diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h index c06bf7a525d9f..5c45e355fb145 100644 --- a/clang/lib/CodeGen/CGVTables.h +++ b/clang/lib/CodeGen/CGVTables.h @@ -75,10 +75,6 @@ class CodeGenVTables { bool vtableHasLocalLinkage, bool isCompleteDtor) const; - bool useRelativeLayout() const; - - llvm::Type *getVTableComponentType() const; - public: /// Add vtable components for the given vtable layout to the given /// global initializer. @@ -151,6 +147,12 @@ class CodeGenVTables { /// Specify a global should not be instrumented with hwasan. void RemoveHwasanMetadata(llvm::GlobalValue *GV) const; + + /// Return the type used as components for a vtable. + llvm::Type *getVTableComponentType() const; + + /// Return true if the relative vtable layout is used. + bool useRelativeLayout() const; }; } // end namespace CodeGen diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 7aa3639cabf39..4321efd49af36 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -1032,7 +1032,7 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { // linker using merged object file. if (!Bm) { auto M = std::make_unique("empty", *VMContext); - M->setTargetTriple(CI.getTargetOpts().Triple); + M->setTargetTriple(Triple(CI.getTargetOpts().Triple)); return M; } Expected> MOrErr = @@ -1123,10 +1123,10 @@ void CodeGenAction::ExecuteAction() { return; const TargetOptions &TargetOpts = CI.getTargetOpts(); - if (TheModule->getTargetTriple() != TargetOpts.Triple) { + if (TheModule->getTargetTriple().str() != TargetOpts.Triple) { Diagnostics.Report(SourceLocation(), diag::warn_fe_override_module) << TargetOpts.Triple; - TheModule->setTargetTriple(TargetOpts.Triple); + TheModule->setTargetTriple(Triple(TargetOpts.Triple)); } EmbedObject(TheModule.get(), CodeGenOpts, Diagnostics); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 7c0d6c3685597..018fc66b72a1e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4192,6 +4192,10 @@ class CodeGenFunction : public CodeGenTypeCache { // some sort of IR. EmitStmt(S.getAssociatedStmt()); } + void EmitOpenACCCacheConstruct(const OpenACCCacheConstruct &S) { + // TODO OpenACC: Implement this. It is currently implemented as a 'no-op', + // but in the future we will implement some sort of IR. + } //===--------------------------------------------------------------------===// // LValue Expression Emission diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 3caa79bb59096..bca0a932b3495 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5595,7 +5595,11 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, if (D->getType()->isReferenceType()) T = D->getType(); - if (getLangOpts().CPlusPlus) { + if (getLangOpts().HLSL && + D->getType().getTypePtr()->isHLSLResourceRecord()) { + Init = llvm::PoisonValue::get(getTypes().ConvertType(ASTTy)); + NeedsGlobalCtor = true; + } else if (getLangOpts().CPlusPlus) { Init = EmitNullConstant(T); if (!IsDefinitionAvailableExternally) NeedsGlobalCtor = true; @@ -7927,3 +7931,49 @@ void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { NewBuilder->ABI->MangleCtx = std::move(ABI->MangleCtx); } + +bool CodeGenModule::classNeedsVectorDestructor(const CXXRecordDecl *RD) { + CXXDestructorDecl *Dtor = RD->getDestructor(); + // The compiler can't know if new[]/delete[] will be used outside of the DLL, + // so just force vector deleting destructor emission if dllexport is present. + // This matches MSVC behavior. + if (Dtor && Dtor->isVirtual() && Dtor->isDefined() && + Dtor->hasAttr()) + return true; + + assert(getCXXABI().hasVectorDeletingDtors()); + return RequireVectorDeletingDtor.count(RD); +} + +void CodeGenModule::requireVectorDestructorDefinition(const CXXRecordDecl *RD) { + assert(getCXXABI().hasVectorDeletingDtors()); + RequireVectorDeletingDtor.insert(RD); + + // To reduce code size in general case we lazily emit scalar deleting + // destructor definition and an alias from vector deleting destructor to + // scalar deleting destructor. It may happen that we first emitted the scalar + // deleting destructor definition and the alias and then discovered that the + // definition of the vector deleting destructor is required. Then we need to + // remove the alias and the scalar deleting destructor and queue vector + // deleting destructor body for emission. Check if that is the case. + CXXDestructorDecl *DtorD = RD->getDestructor(); + GlobalDecl ScalarDtorGD(DtorD, Dtor_Deleting); + StringRef MangledName = getMangledName(ScalarDtorGD); + llvm::GlobalValue *Entry = GetGlobalValue(MangledName); + if (Entry && !Entry->isDeclaration()) { + GlobalDecl VectorDtorGD(DtorD, Dtor_VectorDeleting); + StringRef VDName = getMangledName(VectorDtorGD); + llvm::GlobalValue *VDEntry = GetGlobalValue(VDName); + // It exists and it should be an alias. + assert(VDEntry && isa(VDEntry)); + auto *NewFn = llvm::Function::Create( + cast(VDEntry->getValueType()), + llvm::Function::ExternalLinkage, VDName, &getModule()); + NewFn->takeName(VDEntry); + VDEntry->replaceAllUsesWith(NewFn); + VDEntry->eraseFromParent(); + Entry->replaceAllUsesWith(NewFn); + Entry->eraseFromParent(); + addDeferredDeclToEmit(VectorDtorGD); + } +} diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 4a269f622ece4..83bb5bc54d077 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -528,6 +528,9 @@ class CodeGenModule : public CodeGenTypeCache { /// that we don't re-emit the initializer. llvm::DenseMap DelayedCXXInitPosition; + /// To remember which types did require a vector deleting dtor. + llvm::SmallPtrSet RequireVectorDeletingDtor; + typedef std::pair GlobalInitData; @@ -1544,6 +1547,7 @@ class CodeGenModule : public CodeGenTypeCache { void EmitGlobal(GlobalDecl D); bool TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D); + void EmitDefinitionAsAlias(GlobalDecl Alias, GlobalDecl Target); llvm::GlobalValue *GetGlobalValue(StringRef Ref); @@ -1569,6 +1573,11 @@ class CodeGenModule : public CodeGenTypeCache { void EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF = nullptr); + // Emit code for the OpenACC Declare declaration. + void EmitOpenACCDeclare(const OpenACCDeclareDecl *D, CodeGenFunction *CGF); + // Emit code for the OpenACC Routine declaration. + void EmitOpenACCRoutine(const OpenACCRoutineDecl *D, CodeGenFunction *CGF); + /// Emit a code for requires directive. /// \param D Requires declaration void EmitOMPRequiresDecl(const OMPRequiresDecl *D); @@ -1804,6 +1813,8 @@ class CodeGenModule : public CodeGenTypeCache { // behavior. So projects like the Linux kernel can rely on it. return !getLangOpts().CPlusPlus; } + void requireVectorDestructorDefinition(const CXXRecordDecl *RD); + bool classNeedsVectorDestructor(const CXXRecordDecl *RD); private: bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const; diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 405242e97e75c..bd625052cb5ed 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -107,6 +107,9 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { MT->getNumRows() * MT->getNumColumns()); } + if (T->isMFloat8Type()) + return llvm::Type::getInt8Ty(getLLVMContext()); + llvm::Type *R = ConvertType(T); // Check for the boolean vector case. diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index a84412bd5c045..b145da0f0ec09 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -90,6 +90,8 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { case Dtor_Comdat: llvm_unreachable("emitting dtor comdat as function?"); + case Dtor_VectorDeleting: + llvm_unreachable("unexpected dtor kind for this ABI"); } llvm_unreachable("bad dtor kind"); } @@ -179,6 +181,7 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { } bool shouldTypeidBeNullChecked(QualType SrcRecordTy) override; + bool hasVectorDeletingDtors() override { return false; } void EmitBadTypeidCall(CodeGenFunction &CGF) override; llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy, Address ThisPtr, @@ -448,7 +451,8 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { if (!IsInlined) continue; - StringRef Name = CGM.getMangledName(VtableComponent.getGlobalDecl()); + StringRef Name = CGM.getMangledName( + VtableComponent.getGlobalDecl(/*HasVectorDeletingDtors=*/false)); auto *Entry = CGM.GetGlobalValue(Name); // This checks if virtual inline function has already been emitted. // Note that it is possible that this inline function would be emitted @@ -953,7 +957,7 @@ ItaniumCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF, Builder.CreateCondBr(IsVirtualOffset, MergeBB, ResignBB); CGF.EmitBlock(ResignBB); - llvm::Type *PtrTy = llvm::PointerType::getUnqual(CGM.Int8Ty); + llvm::Type *PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); MemFnPtr = Builder.CreateIntToPtr(MemFnPtr, PtrTy); MemFnPtr = CGF.emitPointerAuthResign(MemFnPtr, SrcType, CurAuthInfo, NewAuthInfo, @@ -2189,12 +2193,14 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD); llvm::Value *VFunc, *VTableSlotPtr = nullptr; auto &Schema = CGM.getCodeGenOpts().PointerAuth.CXXVirtualFunctionPointers; + + llvm::Type *ComponentTy = CGM.getVTables().getVTableComponentType(); + uint64_t ByteOffset = + VTableIndex * CGM.getDataLayout().getTypeSizeInBits(ComponentTy) / 8; + if (!Schema && CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) { - VFunc = CGF.EmitVTableTypeCheckedLoad( - MethodDecl->getParent(), VTable, PtrTy, - VTableIndex * - CGM.getContext().getTargetInfo().getPointerWidth(LangAS::Default) / - 8); + VFunc = CGF.EmitVTableTypeCheckedLoad(MethodDecl->getParent(), VTable, + PtrTy, ByteOffset); } else { CGF.EmitTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc); @@ -2202,7 +2208,7 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, if (CGM.getItaniumVTableContext().isRelativeLayout()) { VFuncLoad = CGF.Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::load_relative, {CGM.Int32Ty}), - {VTable, llvm::ConstantInt::get(CGM.Int32Ty, 4 * VTableIndex)}); + {VTable, llvm::ConstantInt::get(CGM.Int32Ty, ByteOffset)}); } else { VTableSlotPtr = CGF.Builder.CreateConstInBoundsGEP1_64( PtrTy, VTable, VTableIndex, "vfn"); diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 5cb742a92a9bd..4b55fc3f17bd7 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -70,8 +70,8 @@ class MicrosoftCXXABI : public CGCXXABI { switch (GD.getDtorType()) { case Dtor_Complete: case Dtor_Deleting: + case Dtor_VectorDeleting: return true; - case Dtor_Base: return false; @@ -145,6 +145,7 @@ class MicrosoftCXXABI : public CGCXXABI { } bool shouldTypeidBeNullChecked(QualType SrcRecordTy) override; + bool hasVectorDeletingDtors() override { return true; } void EmitBadTypeidCall(CodeGenFunction &CGF) override; llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy, Address ThisPtr, @@ -260,7 +261,7 @@ class MicrosoftCXXABI : public CGCXXABI { // There's only Dtor_Deleting in vftable but it shares the this // adjustment with the base one, so look up the deleting one instead. - LookupGD = GlobalDecl(DD, Dtor_Deleting); + LookupGD = GlobalDecl(DD, Dtor_VectorDeleting); } MethodVFTableLocation ML = CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD); @@ -342,8 +343,8 @@ class MicrosoftCXXABI : public CGCXXABI { void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD, CallArgList &CallArgs) override { - assert(GD.getDtorType() == Dtor_Deleting && - "Only deleting destructor thunks are available in this ABI"); + assert(GD.getDtorType() == Dtor_VectorDeleting && + "Only vector deleting destructor thunks are available in this ABI"); CallArgs.add(RValue::get(getStructorImplicitParamValue(CGF)), getContext().IntTy); } @@ -1090,7 +1091,8 @@ bool MicrosoftCXXABI::HasThisReturn(GlobalDecl GD) const { static bool isDeletingDtor(GlobalDecl GD) { return isa(GD.getDecl()) && - GD.getDtorType() == Dtor_Deleting; + (GD.getDtorType() == Dtor_Deleting || + GD.getDtorType() == Dtor_VectorDeleting); } bool MicrosoftCXXABI::hasMostDerivedReturn(GlobalDecl GD) const { @@ -1343,7 +1345,8 @@ MicrosoftCXXABI::buildStructorSignature(GlobalDecl GD, AddedStructorArgCounts Added; // TODO: 'for base' flag if (isa(GD.getDecl()) && - GD.getDtorType() == Dtor_Deleting) { + (GD.getDtorType() == Dtor_Deleting || + GD.getDtorType() == Dtor_VectorDeleting)) { // The scalar deleting destructor takes an implicit int parameter. ArgTys.push_back(getContext().IntTy); ++Added.Suffix; @@ -1375,7 +1378,7 @@ void MicrosoftCXXABI::setCXXDestructorDLLStorage(llvm::GlobalValue *GV, CXXDtorType DT) const { // Deleting destructor variants are never imported or exported. Give them the // default storage class. - if (DT == Dtor_Deleting) { + if (DT == Dtor_Deleting || DT == Dtor_VectorDeleting) { GV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); } else { const NamedDecl *ND = Dtor; @@ -1409,6 +1412,12 @@ llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage( // and are emitted everywhere they are used. They are internal if the class // is internal. return llvm::GlobalValue::LinkOnceODRLinkage; + case Dtor_VectorDeleting: + // Use the weak, non-ODR linkage for vector deleting destructors to block + // inlining. This enables an MS ABI code-size saving optimization that + // allows us to avoid emitting array deletion code when arrays of a given + // type are not allocated within the final linkage unit. + return llvm::GlobalValue::WeakAnyLinkage; case Dtor_Comdat: llvm_unreachable("MS C++ ABI does not support comdat dtors"); } @@ -1440,7 +1449,7 @@ MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) { // There's no Dtor_Base in vftable but it shares the this adjustment with // the deleting one, so look it up instead. - GD = GlobalDecl(DD, Dtor_Deleting); + GD = GlobalDecl(DD, Dtor_VectorDeleting); } MethodVFTableLocation ML = @@ -1489,7 +1498,7 @@ Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall( // There's only Dtor_Deleting in vftable but it shares the this adjustment // with the base one, so look up the deleting one instead. - LookupGD = GlobalDecl(DD, Dtor_Deleting); + LookupGD = GlobalDecl(DD, Dtor_VectorDeleting); } MethodVFTableLocation ML = CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD); @@ -2002,20 +2011,20 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( auto *D = dyn_cast(E); assert((CE != nullptr) ^ (D != nullptr)); assert(CE == nullptr || CE->arg_begin() == CE->arg_end()); - assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete); + assert(DtorType == Dtor_VectorDeleting || DtorType == Dtor_Complete || + DtorType == Dtor_Deleting); // We have only one destructor in the vftable but can get both behaviors // by passing an implicit int parameter. - GlobalDecl GD(Dtor, Dtor_Deleting); + GlobalDecl GD(Dtor, Dtor_VectorDeleting); const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(GD); llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo); CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty); ASTContext &Context = getContext(); - llvm::Value *ImplicitParam = llvm::ConstantInt::get( - llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()), - DtorType == Dtor_Deleting); + uint32_t Flags = ((D && D->isArrayForm()) << 1) | (DtorType == Dtor_Deleting); + llvm::Value *ImplicitParam = CGF.Builder.getInt32(Flags); QualType ThisTy; if (CE) { @@ -4055,6 +4064,18 @@ void MicrosoftCXXABI::emitCXXStructor(GlobalDecl GD) { if (GD.getDtorType() == Dtor_Base && !CGM.TryEmitBaseDestructorAsAlias(dtor)) return; + if (GD.getDtorType() == Dtor_VectorDeleting && + !CGM.classNeedsVectorDestructor(dtor->getParent())) { + // Create GlobalDecl object with the correct type for the scalar + // deleting destructor. + GlobalDecl ScalarDtorGD(dtor, Dtor_Deleting); + + // Emit an alias from the vector deleting destructor to the scalar deleting + // destructor. + CGM.EmitDefinitionAsAlias(GD, ScalarDtorGD); + return; + } + llvm::Function *Fn = CGM.codegenCXXStructor(GD); if (Fn->isWeakForLinker()) Fn->setComdat(CGM.getModule().getOrInsertComdat(Fn->getName())); diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp index d4e0ab0339a8b..09a7d79ae4afb 100644 --- a/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/clang/lib/CodeGen/ModuleBuilder.cpp @@ -151,7 +151,7 @@ namespace { void Initialize(ASTContext &Context) override { Ctx = &Context; - M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple()); + M->setTargetTriple(Ctx->getTargetInfo().getTriple()); M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString()); const auto &SDKVersion = Ctx->getTargetInfo().getSDKVersion(); if (!SDKVersion.empty()) diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp index 788c8b932ab52..95971e57086e7 100644 --- a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp +++ b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp @@ -255,7 +255,7 @@ class PCHContainerGenerator : public ASTConsumer { if (Diags.hasErrorOccurred()) return; - M->setTargetTriple(Ctx.getTargetInfo().getTriple().getTriple()); + M->setTargetTriple(Ctx.getTargetInfo().getTriple()); M->setDataLayout(Ctx.getTargetInfo().getDataLayoutString()); // PCH files don't have a signature field in the control block, @@ -274,7 +274,7 @@ class PCHContainerGenerator : public ASTConsumer { // Ensure the target exists. std::string Error; auto Triple = Ctx.getTargetInfo().getTriple(); - if (!llvm::TargetRegistry::lookupTarget(Triple.getTriple(), Error)) + if (!llvm::TargetRegistry::lookupTarget(Triple, Error)) llvm::report_fatal_error(llvm::Twine(Error)); // Emit the serialized Clang AST into its own section. diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index d6e0e720a0941..073ca3cc82690 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -763,7 +763,7 @@ bool AArch64ABIInfo::passAsPureScalableType( return false; bool isPredicate; - switch (Ty->getAs()->getKind()) { + switch (Ty->castAs()->getKind()) { #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ case BuiltinType::Id: \ isPredicate = false; \ diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp index aa5fb6329c1c1..5aa10ba41f5ed 100644 --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -8,6 +8,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" +#include "llvm/TargetParser/RISCVTargetParser.h" using namespace clang; using namespace clang::CodeGen; @@ -34,6 +35,9 @@ class RISCVABIInfo : public DefaultABIInfo { llvm::Type *&Field2Ty, CharUnits &Field2Off) const; + bool detectVLSCCEligibleStruct(QualType Ty, unsigned ABIVLen, + llvm::Type *&VLSType) const; + public: RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen, bool EABI) @@ -45,8 +49,8 @@ class RISCVABIInfo : public DefaultABIInfo { void computeInfo(CGFunctionInfo &FI) const override; ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, - int &ArgFPRsLeft) const; - ABIArgInfo classifyReturnType(QualType RetTy) const; + int &ArgFPRsLeft, unsigned ABIVLen) const; + ABIArgInfo classifyReturnType(QualType RetTy, unsigned ABIVLen) const; RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, AggValueSlot Slot) const override; @@ -62,7 +66,7 @@ class RISCVABIInfo : public DefaultABIInfo { llvm::Type *Field2Ty, CharUnits Field2Off) const; - ABIArgInfo coerceVLSVector(QualType Ty) const; + ABIArgInfo coerceVLSVector(QualType Ty, unsigned ABIVLen = 0) const; using ABIInfo::appendAttributeMangling; void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index, @@ -111,9 +115,32 @@ void RISCVABIInfo::appendAttributeMangling(StringRef AttrStr, } void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { + unsigned ABIVLen; + switch (FI.getExtInfo().getCC()) { + default: + ABIVLen = 0; + break; +#define CC_VLS_CASE(ABI_VLEN) \ + case CallingConv::CC_RISCVVLSCall_##ABI_VLEN: \ + ABIVLen = ABI_VLEN; \ + break; + CC_VLS_CASE(32) + CC_VLS_CASE(64) + CC_VLS_CASE(128) + CC_VLS_CASE(256) + CC_VLS_CASE(512) + CC_VLS_CASE(1024) + CC_VLS_CASE(2048) + CC_VLS_CASE(4096) + CC_VLS_CASE(8192) + CC_VLS_CASE(16384) + CC_VLS_CASE(32768) + CC_VLS_CASE(65536) +#undef CC_VLS_CASE + } QualType RetTy = FI.getReturnType(); if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(RetTy); + FI.getReturnInfo() = classifyReturnType(RetTy, ABIVLen); // IsRetIndirect is true if classifyArgumentType indicated the value should // be passed indirect, or if the type size is a scalar greater than 2*XLen @@ -139,8 +166,8 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { int ArgNum = 0; for (auto &ArgInfo : FI.arguments()) { bool IsFixed = ArgNum < NumFixedArgs; - ArgInfo.info = - classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); + ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, + ArgFPRsLeft, ABIVLen); ArgNum++; } } @@ -359,9 +386,158 @@ ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct( return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); } +bool RISCVABIInfo::detectVLSCCEligibleStruct(QualType Ty, unsigned ABIVLen, + llvm::Type *&VLSType) const { + // No riscv_vls_cc attribute. + if (ABIVLen == 0) + return false; + + // Legal struct for VLS calling convention should fulfill following rules: + // 1. Struct element should be either "homogeneous fixed-length vectors" or "a + // fixed-length vector array". + // 2. Number of struct elements or array elements should be greater or equal + // to 1 and less or equal to 8 + // 3. Total number of vector registers needed should not exceed 8. + // + // Examples: Assume ABI_VLEN = 128. + // These are legal structs: + // a. Structs with 1~8 "same" fixed-length vectors, e.g. + // struct { + // __attribute__((vector_size(16))) int a; + // __attribute__((vector_size(16))) int b; + // } + // + // b. Structs with "single" fixed-length vector array with lengh 1~8, e.g. + // struct { + // __attribute__((vector_size(16))) int a[3]; + // } + // These are illegal structs: + // a. Structs with 9 fixed-length vectors, e.g. + // struct { + // __attribute__((vector_size(16))) int a; + // __attribute__((vector_size(16))) int b; + // __attribute__((vector_size(16))) int c; + // __attribute__((vector_size(16))) int d; + // __attribute__((vector_size(16))) int e; + // __attribute__((vector_size(16))) int f; + // __attribute__((vector_size(16))) int g; + // __attribute__((vector_size(16))) int h; + // __attribute__((vector_size(16))) int i; + // } + // + // b. Structs with "multiple" fixed-length vector array, e.g. + // struct { + // __attribute__((vector_size(16))) int a[2]; + // __attribute__((vector_size(16))) int b[2]; + // } + // + // c. Vector registers needed exceeds 8, e.g. + // struct { + // // Registers needed for single fixed-length element: + // // 64 * 8 / ABI_VLEN = 4 + // __attribute__((vector_size(64))) int a; + // __attribute__((vector_size(64))) int b; + // __attribute__((vector_size(64))) int c; + // __attribute__((vector_size(64))) int d; + // } + // + // Struct of 1 fixed-length vector is passed as a scalable vector. + // Struct of >1 fixed-length vectors are passed as vector tuple. + // Struct of 1 array of fixed-length vectors is passed as a scalable vector. + // Otherwise, pass the struct indirectly. + + if (llvm::StructType *STy = dyn_cast(CGT.ConvertType(Ty))) { + unsigned NumElts = STy->getStructNumElements(); + if (NumElts > 8) + return false; + + auto *FirstEltTy = STy->getElementType(0); + if (!STy->containsHomogeneousTypes()) + return false; + + // Check structure of fixed-length vectors and turn them into vector tuple + // type if legal. + if (auto *FixedVecTy = dyn_cast(FirstEltTy)) { + if (NumElts == 1) { + // Handle single fixed-length vector. + VLSType = llvm::ScalableVectorType::get( + FixedVecTy->getElementType(), + llvm::divideCeil(FixedVecTy->getNumElements() * + llvm::RISCV::RVVBitsPerBlock, + ABIVLen)); + // Check registers needed <= 8. + return llvm::divideCeil( + FixedVecTy->getNumElements() * + FixedVecTy->getElementType()->getScalarSizeInBits(), + ABIVLen) <= 8; + } + // LMUL + // = fixed-length vector size / ABIVLen + // = 8 * I8EltCount / RVVBitsPerBlock + // => + // I8EltCount + // = (fixed-length vector size * RVVBitsPerBlock) / (ABIVLen * 8) + unsigned I8EltCount = llvm::divideCeil( + FixedVecTy->getNumElements() * + FixedVecTy->getElementType()->getScalarSizeInBits() * + llvm::RISCV::RVVBitsPerBlock, + ABIVLen * 8); + VLSType = llvm::TargetExtType::get( + getVMContext(), "riscv.vector.tuple", + llvm::ScalableVectorType::get(llvm::Type::getInt8Ty(getVMContext()), + I8EltCount), + NumElts); + // Check registers needed <= 8. + return NumElts * + llvm::divideCeil( + FixedVecTy->getNumElements() * + FixedVecTy->getElementType()->getScalarSizeInBits(), + ABIVLen) <= + 8; + } + + // If elements are not fixed-length vectors, it should be an array. + if (NumElts != 1) + return false; + + // Check array of fixed-length vector and turn it into scalable vector type + // if legal. + if (auto *ArrTy = dyn_cast(FirstEltTy)) { + unsigned NumArrElt = ArrTy->getNumElements(); + if (NumArrElt > 8) + return false; + + auto *ArrEltTy = dyn_cast(ArrTy->getElementType()); + if (!ArrEltTy) + return false; + + // LMUL + // = NumArrElt * fixed-length vector size / ABIVLen + // = fixed-length vector elt size * ScalVecNumElts / RVVBitsPerBlock + // => + // ScalVecNumElts + // = (NumArrElt * fixed-length vector size * RVVBitsPerBlock) / + // (ABIVLen * fixed-length vector elt size) + // = NumArrElt * num fixed-length vector elt * RVVBitsPerBlock / + // ABIVLen + unsigned ScalVecNumElts = llvm::divideCeil( + NumArrElt * ArrEltTy->getNumElements() * llvm::RISCV::RVVBitsPerBlock, + ABIVLen); + VLSType = llvm::ScalableVectorType::get(ArrEltTy->getElementType(), + ScalVecNumElts); + // Check registers needed <= 8. + return llvm::divideCeil( + ScalVecNumElts * + ArrEltTy->getElementType()->getScalarSizeInBits(), + llvm::RISCV::RVVBitsPerBlock) <= 8; + } + } + return false; +} + // Fixed-length RVV vectors are represented as scalable vectors in function // args/return and must be coerced from fixed vectors. -ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const { +ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty, unsigned ABIVLen) const { assert(Ty->isVectorType() && "expected vector type!"); const auto *VT = Ty->castAs(); @@ -385,23 +561,58 @@ ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const { NumElts *= 8; break; default: - assert(VT->getVectorKind() == VectorKind::RVVFixedLengthData && + assert((VT->getVectorKind() == VectorKind::Generic || + VT->getVectorKind() == VectorKind::RVVFixedLengthData) && "Unexpected vector kind"); EltType = CGT.ConvertType(VT->getElementType()); } - // The MinNumElts is simplified from equation: - // NumElts / VScale = - // (EltSize * NumElts / (VScale * RVVBitsPerBlock)) - // * (RVVBitsPerBlock / EltSize) - llvm::ScalableVectorType *ResType = - llvm::ScalableVectorType::get(EltType, NumElts / VScale->first); + llvm::ScalableVectorType *ResType; + + if (ABIVLen == 0) { + // The MinNumElts is simplified from equation: + // NumElts / VScale = + // (EltSize * NumElts / (VScale * RVVBitsPerBlock)) + // * (RVVBitsPerBlock / EltSize) + ResType = llvm::ScalableVectorType::get(EltType, NumElts / VScale->first); + } else { + // Check registers needed <= 8. + if ((EltType->getScalarSizeInBits() * NumElts / ABIVLen) > 8) + return getNaturalAlignIndirect( + Ty, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), + /*ByVal=*/false); + + // Generic vector + // The number of elements needs to be at least 1. + ResType = llvm::ScalableVectorType::get( + EltType, + llvm::divideCeil(NumElts * llvm::RISCV::RVVBitsPerBlock, ABIVLen)); + + // If the corresponding extension is not supported, just make it an i8 + // vector with same LMUL. + const TargetInfo &TI = getContext().getTargetInfo(); + if ((EltType->isHalfTy() && !TI.hasFeature("zvfhmin")) || + (EltType->isBFloatTy() && !TI.hasFeature("zvfbfmin")) || + (EltType->isFloatTy() && !TI.hasFeature("zve32f")) || + (EltType->isDoubleTy() && !TI.hasFeature("zve64d")) || + (EltType->isIntegerTy(64) && !TI.hasFeature("zve64x")) || + EltType->isIntegerTy(128)) { + // The number of elements needs to be at least 1. + ResType = llvm::ScalableVectorType::get( + llvm::Type::getInt8Ty(getVMContext()), + llvm::divideCeil(EltType->getScalarSizeInBits() * NumElts * + llvm::RISCV::RVVBitsPerBlock, + 8 * ABIVLen)); + } + } + return ABIArgInfo::getDirect(ResType); } ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, - int &ArgFPRsLeft) const { + int &ArgFPRsLeft, + unsigned ABIVLen) const { assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); Ty = useFirstFieldIfTransparentUnion(Ty); @@ -458,6 +669,12 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, } } + if (IsFixed && Ty->isStructureOrClassType()) { + llvm::Type *VLSType = nullptr; + if (detectVLSCCEligibleStruct(Ty, ABIVLen, VLSType)) + return ABIArgInfo::getDirect(VLSType); + } + uint64_t NeededAlign = getContext().getTypeAlign(Ty); // Determine the number of GPRs needed to pass the current argument // according to the ABI. 2*XLen-aligned varargs are passed in "aligned" @@ -501,13 +718,22 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, return ABIArgInfo::getDirect(); } - if (const VectorType *VT = Ty->getAs()) + // TODO: _BitInt is not handled yet in VLS calling convention since _BitInt + // ABI is also not merged yet in RISC-V: + // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/419 + if (const VectorType *VT = Ty->getAs(); + VT && !VT->getElementType()->isBitIntType()) { if (VT->getVectorKind() == VectorKind::RVVFixedLengthData || VT->getVectorKind() == VectorKind::RVVFixedLengthMask || VT->getVectorKind() == VectorKind::RVVFixedLengthMask_1 || VT->getVectorKind() == VectorKind::RVVFixedLengthMask_2 || VT->getVectorKind() == VectorKind::RVVFixedLengthMask_4) return coerceVLSVector(Ty); + if (VT->getVectorKind() == VectorKind::Generic && ABIVLen != 0) + // Generic vector without riscv_vls_cc should fall through and pass by + // reference. + return coerceVLSVector(Ty, ABIVLen); + } // Aggregates which are <= 2*XLen will be passed in registers if possible, // so coerce to integers. @@ -532,7 +758,8 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, /*ByVal=*/false); } -ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const { +ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy, + unsigned ABIVLen) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); @@ -541,8 +768,8 @@ ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const { // The rules for return and argument types are the same, so defer to // classifyArgumentType. - return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, - ArgFPRsLeft); + return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, ArgFPRsLeft, + ABIVLen); } RValue RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, @@ -606,6 +833,12 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { switch (Attr->getInterrupt()) { case RISCVInterruptAttr::supervisor: Kind = "supervisor"; break; case RISCVInterruptAttr::machine: Kind = "machine"; break; + case RISCVInterruptAttr::qcinest: + Kind = "qci-nest"; + break; + case RISCVInterruptAttr::qcinonest: + Kind = "qci-nonest"; + break; } Fn->addFnAttr("interrupt", Kind); diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index b7a1374d5b399..b36a6e1396653 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -3390,6 +3390,9 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, case BuiltinType::Int128: case BuiltinType::UInt128: + case BuiltinType::Float128: + // 128-bit float and integer types share the same ABI. + // If it's a parameter type, the normal ABI rule is that arguments larger // than 8 bytes are passed indirectly. GCC follows it. We follow it too, // even though it isn't particularly efficient. @@ -3400,6 +3403,8 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. // Clang matches them for compatibility. + // NOTE: GCC actually returns f128 indirectly but will hopefully change. + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054#c8. return ABIArgInfo::getDirect(llvm::FixedVectorType::get( llvm::Type::getInt64Ty(getVMContext()), 2)); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index eca96c1cce7f7..e998c94aeacd1 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -226,10 +226,7 @@ std::string CUIDOptions::getCUID(StringRef InputFile, else if (UseCUID == Kind::Hash) { llvm::MD5 Hasher; llvm::MD5::MD5Result Hash; - SmallString<256> RealPath; - llvm::sys::fs::real_path(InputFile, RealPath, - /*expand_tilde=*/true); - Hasher.update(RealPath); + Hasher.update(InputFile); for (auto *A : Args) { if (A->getOption().matches(options::OPT_INPUT)) continue; diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 294f637ef6515..6e75001585c61 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -30,7 +30,8 @@ static const SanitizerMask NeedsUbsanRt = SanitizerKind::Undefined | SanitizerKind::Integer | SanitizerKind::LocalBounds | SanitizerKind::ImplicitConversion | SanitizerKind::Nullability | SanitizerKind::CFI | - SanitizerKind::FloatDivideByZero | SanitizerKind::ObjCCast; + SanitizerKind::FloatDivideByZero | SanitizerKind::ObjCCast | + SanitizerKind::Vptr; static const SanitizerMask NeedsUbsanCxxRt = SanitizerKind::Vptr | SanitizerKind::CFI; static const SanitizerMask NotAllowedWithTrap = SanitizerKind::Vptr; @@ -53,11 +54,12 @@ static const SanitizerMask SupportsCoverage = SanitizerKind::FuzzerNoLink | SanitizerKind::FloatDivideByZero | SanitizerKind::SafeStack | SanitizerKind::ShadowCallStack | SanitizerKind::Thread | SanitizerKind::ObjCCast | SanitizerKind::KCFI | - SanitizerKind::NumericalStability; + SanitizerKind::NumericalStability | SanitizerKind::Vptr; static const SanitizerMask RecoverableByDefault = SanitizerKind::Undefined | SanitizerKind::Integer | SanitizerKind::ImplicitConversion | SanitizerKind::Nullability | - SanitizerKind::FloatDivideByZero | SanitizerKind::ObjCCast; + SanitizerKind::FloatDivideByZero | SanitizerKind::ObjCCast | + SanitizerKind::Vptr; static const SanitizerMask Unrecoverable = SanitizerKind::Unreachable | SanitizerKind::Return; static const SanitizerMask AlwaysRecoverable = SanitizerKind::KernelAddress | @@ -65,11 +67,12 @@ static const SanitizerMask AlwaysRecoverable = SanitizerKind::KernelAddress | SanitizerKind::KCFI; static const SanitizerMask NeedsLTO = SanitizerKind::CFI; static const SanitizerMask TrappingSupported = - (SanitizerKind::Undefined & ~SanitizerKind::Vptr) | SanitizerKind::Integer | + SanitizerKind::Undefined | SanitizerKind::Integer | SanitizerKind::ImplicitConversion | SanitizerKind::Nullability | SanitizerKind::LocalBounds | SanitizerKind::CFI | SanitizerKind::FloatDivideByZero | SanitizerKind::ObjCCast; -static const SanitizerMask MergeDefault = SanitizerKind::Undefined; +static const SanitizerMask MergeDefault = + SanitizerKind::Undefined | SanitizerKind::Vptr; static const SanitizerMask TrappingDefault = SanitizerKind::CFI | SanitizerKind::LocalBounds; static const SanitizerMask CFIClasses = @@ -195,8 +198,8 @@ static void addDefaultIgnorelists(const Driver &D, SanitizerMask Kinds, {"dfsan_abilist.txt", SanitizerKind::DataFlow}, {"cfi_ignorelist.txt", SanitizerKind::CFI}, {"ubsan_ignorelist.txt", - SanitizerKind::Undefined | SanitizerKind::Integer | - SanitizerKind::Nullability | + SanitizerKind::Undefined | SanitizerKind::Vptr | + SanitizerKind::Integer | SanitizerKind::Nullability | SanitizerKind::FloatDivideByZero}}; for (auto BL : Ignorelists) { diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 3aee540d501be..51454de1b9dcc 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -215,7 +215,8 @@ bool arm::isHardTPSupported(const llvm::Triple &Triple) { // Select mode for reading thread pointer (-mtp=soft/cp15). arm::ReadTPMode arm::getReadTPMode(const Driver &D, const ArgList &Args, const llvm::Triple &Triple, bool ForAS) { - if (Arg *A = Args.getLastArg(options::OPT_mtp_mode_EQ)) { + Arg *A = Args.getLastArg(options::OPT_mtp_mode_EQ); + if (A && A->getValue() != StringRef("auto")) { arm::ReadTPMode ThreadPointer = llvm::StringSwitch(A->getValue()) .Case("cp15", ReadTPMode::TPIDRURO) @@ -239,7 +240,7 @@ arm::ReadTPMode arm::getReadTPMode(const Driver &D, const ArgList &Args, D.Diag(diag::err_drv_invalid_mtp) << A->getAsString(Args); return ReadTPMode::Invalid; } - return ReadTPMode::Soft; + return (isHardTPSupported(Triple) ? ReadTPMode::TPIDRURO : ReadTPMode::Soft); } void arm::setArchNameInTriple(const Driver &D, const ArgList &Args, @@ -574,12 +575,14 @@ llvm::ARM::FPUKind arm::getARMTargetFeatures(const Driver &D, A->ignoreTargetSpecific(); } - if (getReadTPMode(D, Args, Triple, ForAS) == ReadTPMode::TPIDRURW) + arm::ReadTPMode TPMode = getReadTPMode(D, Args, Triple, ForAS); + + if (TPMode == ReadTPMode::TPIDRURW) Features.push_back("+read-tp-tpidrurw"); - if (getReadTPMode(D, Args, Triple, ForAS) == ReadTPMode::TPIDRURO) - Features.push_back("+read-tp-tpidruro"); - if (getReadTPMode(D, Args, Triple, ForAS) == ReadTPMode::TPIDRPRW) + else if (TPMode == ReadTPMode::TPIDRPRW) Features.push_back("+read-tp-tpidrprw"); + else if (TPMode == ReadTPMode::TPIDRURO) + Features.push_back("+read-tp-tpidruro"); const Arg *ArchArg = Args.getLastArg(options::OPT_march_EQ); const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ); diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 75f126965e0ac..e67997314da36 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/Threading.h" @@ -1617,6 +1618,12 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args, } } + if (Sanitize.needsMemProfRt()) + if (hasExportSymbolDirective(Args)) + addExportedSymbol( + CmdArgs, + llvm::memprof::getMemprofOptionsSymbolDarwinLinkageName().data()); + const XRayArgs &XRay = getXRayArgs(); if (XRay.needsXRayRt()) { AddLinkRuntimeLib(Args, CmdArgs, "xray"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 45c26cf8c3159..d4fea633d0edf 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -60,6 +60,8 @@ void Flang::addFortranDialectOptions(const ArgList &Args, options::OPT_frealloc_lhs, options::OPT_fno_realloc_lhs, options::OPT_fsave_main_program, + options::OPT_fd_lines_as_code, + options::OPT_fd_lines_as_comments, options::OPT_fno_save_main_program}); } diff --git a/clang/lib/Driver/ToolChains/OHOS.cpp b/clang/lib/Driver/ToolChains/OHOS.cpp index 6e1a09ae908b2..e213c695a9fef 100644 --- a/clang/lib/Driver/ToolChains/OHOS.cpp +++ b/clang/lib/Driver/ToolChains/OHOS.cpp @@ -111,6 +111,8 @@ std::string OHOS::getMultiarchTriple(const llvm::Triple &T) const { return "x86_64-linux-ohos"; case llvm::Triple::aarch64: return "aarch64-linux-ohos"; + case llvm::Triple::loongarch64: + return "loongarch64-linux-ohos"; } return T.str(); } @@ -368,7 +370,9 @@ void OHOS::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { CmdArgs.push_back("-z"); CmdArgs.push_back("relro"); CmdArgs.push_back("-z"); - CmdArgs.push_back("max-page-size=4096"); + CmdArgs.push_back(getArch() == llvm::Triple::loongarch64 + ? "max-page-size=16384" + : "max-page-size=4096"); // .gnu.hash section is not compatible with the MIPS target if (getArch() != llvm::Triple::mipsel) CmdArgs.push_back("--hash-style=both"); diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index 93f17a03c580f..cd12f2ae5a6de 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -69,6 +69,18 @@ static bool TargetBuildsComponents(const llvm::Triple &TargetTriple) { TargetTriple.getOSName() != "wasi"; } +static bool WantsPthread(const llvm::Triple &Triple, const ArgList &Args) { + bool WantsPthread = + Args.hasFlag(options::OPT_pthread, options::OPT_no_pthread, false); + + // If the WASI environment is "threads" then enable pthreads support + // without requiring -pthread, in order to prevent user error + if (Triple.isOSWASI() && Triple.getEnvironmentName() == "threads") + WantsPthread = true; + + return WantsPthread; +} + void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -150,14 +162,14 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - if (Args.hasArg(options::OPT_pthread)) + if (WantsPthread(ToolChain.getTriple(), Args)) CmdArgs.push_back("--shared-memory"); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (ToolChain.ShouldLinkCXXStdlib(Args)) ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - if (Args.hasArg(options::OPT_pthread)) + if (WantsPthread(ToolChain.getTriple(), Args)) CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lc"); @@ -292,8 +304,7 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, CC1Args.push_back("-fno-use-init-array"); // '-pthread' implies atomics, bulk-memory, mutable-globals, and sign-ext - if (DriverArgs.hasFlag(options::OPT_pthread, options::OPT_no_pthread, - false)) { + if (WantsPthread(getTriple(), DriverArgs)) { if (DriverArgs.hasFlag(options::OPT_mno_atomics, options::OPT_matomics, false)) getDriver().Diag(diag::err_drv_argument_not_allowed_with) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 972dceb697a8b..1969f4297b211 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -628,6 +628,10 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // name. !Style.isJavaScript() && Previous.isNot(tok::kw_template) && CurrentState.BreakBeforeParameter) { + for (const auto *Tok = &Previous; Tok; Tok = Tok->Previous) + if (Tok->FirstAfterPPLine || Tok->is(TT_LineComment)) + return false; + return true; } diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 92678a031178a..b5f1241321891 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -3547,7 +3547,8 @@ tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code, return sortJavaScriptImports(Style, Code, Ranges, FileName); if (Style.Language == FormatStyle::LanguageKind::LK_Java) return sortJavaImports(Style, Code, Ranges, FileName, Replaces); - sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor); + if (Style.isCpp()) + sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor); return Replaces; } @@ -3941,34 +3942,42 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOptions LangOpts; - FormatStyle::LanguageStandard LexingStd = Style.Standard; - if (LexingStd == FormatStyle::LS_Auto) - LexingStd = FormatStyle::LS_Latest; - if (LexingStd == FormatStyle::LS_Latest) + auto LexingStd = Style.Standard; + if (LexingStd == FormatStyle::LS_Auto || LexingStd == FormatStyle::LS_Latest) LexingStd = FormatStyle::LS_Cpp20; - LangOpts.CPlusPlus = 1; - LangOpts.CPlusPlus11 = LexingStd >= FormatStyle::LS_Cpp11; - LangOpts.CPlusPlus14 = LexingStd >= FormatStyle::LS_Cpp14; - LangOpts.CPlusPlus17 = LexingStd >= FormatStyle::LS_Cpp17; - LangOpts.CPlusPlus20 = LexingStd >= FormatStyle::LS_Cpp20; - LangOpts.Char8 = LexingStd >= FormatStyle::LS_Cpp20; + + const bool SinceCpp11 = LexingStd >= FormatStyle::LS_Cpp11; + const bool SinceCpp20 = LexingStd >= FormatStyle::LS_Cpp20; + + switch (Style.Language) { + case FormatStyle::LK_C: + LangOpts.C17 = 1; + break; + case FormatStyle::LK_Cpp: + case FormatStyle::LK_ObjC: + LangOpts.CXXOperatorNames = 1; + LangOpts.CPlusPlus11 = SinceCpp11; + LangOpts.CPlusPlus14 = LexingStd >= FormatStyle::LS_Cpp14; + LangOpts.CPlusPlus17 = LexingStd >= FormatStyle::LS_Cpp17; + LangOpts.CPlusPlus20 = SinceCpp20; + [[fallthrough]]; + default: + LangOpts.CPlusPlus = 1; + } + + LangOpts.Char8 = SinceCpp20; // Turning on digraphs in standards before C++0x is error-prone, because e.g. // the sequence "<::" will be unconditionally treated as "[:". // Cf. Lexer::LexTokenInternal. - LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11; + LangOpts.Digraphs = SinceCpp11; LangOpts.LineComment = 1; - - const auto Language = Style.Language; - LangOpts.C17 = Language == FormatStyle::LK_C; - LangOpts.CXXOperatorNames = - Language == FormatStyle::LK_Cpp || Language == FormatStyle::LK_ObjC; - LangOpts.Bool = 1; LangOpts.ObjC = 1; LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. LangOpts.DeclSpecKeyword = 1; // To get __declspec. LangOpts.C99 = 1; // To get kw_restrict for non-underscore-prefixed restrict. + return LangOpts; } diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 02429970599c0..77935e75d4b4c 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -594,6 +594,9 @@ struct FormatToken { /// Has "\n\f\n" or "\n\f\r\n" before TokenText. bool HasFormFeedBefore = false; + /// Is the first token after a preprocessor line. + bool FirstAfterPPLine = false; + /// Number of optional braces to be inserted after this token: /// -1: a single left brace /// 0: no braces diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 3a49650d95ba4..08539de405c67 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4845,16 +4845,11 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return Style.SpaceBeforeParensOptions.AfterControlStatements || spaceRequiredBeforeParens(Right); } - if (Left.isOneOf(tok::kw_new, tok::kw_delete)) { - return ((!Line.MightBeFunctionDecl || !BeforeLeft) && - Style.SpaceBeforeParens != FormatStyle::SBPO_Never) || - spaceRequiredBeforeParens(Right); - } - - if (Left.is(tok::r_square) && Left.MatchingParen && - Left.MatchingParen->Previous && - Left.MatchingParen->Previous->is(tok::kw_delete)) { - return (Style.SpaceBeforeParens != FormatStyle::SBPO_Never) || + if (Left.isOneOf(tok::kw_new, tok::kw_delete) || + (Left.is(tok::r_square) && Left.MatchingParen && + Left.MatchingParen->Previous && + Left.MatchingParen->Previous->is(tok::kw_delete))) { + return Style.SpaceBeforeParens != FormatStyle::SBPO_Never || spaceRequiredBeforeParens(Right); } } diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 14e984529d640..000a5105ca407 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -116,36 +116,18 @@ class LevelIndentTracker { Style.isCSharp()) { return 0; } - - auto IsAccessModifier = [&](const FormatToken &RootToken) { - if (Line.Type == LT_AccessModifier || RootToken.isObjCAccessSpecifier()) - return true; - - const auto *Next = RootToken.Next; - - // Handle Qt signals. - if (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) && - Next && Next->is(tok::colon)) { - return true; - } - - if (Next && Next->isOneOf(Keywords.kw_slots, Keywords.kw_qslots) && - Next->Next && Next->Next->is(tok::colon)) { - return true; - } - - // Handle malformed access specifier e.g. 'private' without trailing ':'. - return !Next && RootToken.isAccessSpecifier(false); - }; - - if (IsAccessModifier(*Line.First)) { + const auto &RootToken = *Line.First; + if (Line.Type == LT_AccessModifier || + RootToken.isAccessSpecifier(/*ColonRequired=*/false) || + RootToken.isObjCAccessSpecifier() || + (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) && + RootToken.Next && RootToken.Next->is(tok::colon))) { // The AccessModifierOffset may be overridden by IndentAccessModifiers, // in which case we take a negative value of the IndentWidth to simulate // the upper indent level. return Style.IndentAccessModifiers ? -Style.IndentWidth : Style.AccessModifierOffset; } - return 0; } diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 16f19e955bf55..6854e224c2631 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -119,7 +119,7 @@ class ScopedLineState { assert(Parser.Line->Tokens.empty()); Parser.Line = std::move(PreBlockLine); if (Parser.CurrentLines == &Parser.PreprocessorDirectives) - Parser.MustBreakBeforeNextToken = true; + Parser.AtEndOfPPLine = true; Parser.CurrentLines = OriginalLines; } @@ -158,8 +158,8 @@ UnwrappedLineParser::UnwrappedLineParser( ArrayRef Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator &Allocator, IdentifierTable &IdentTable) - : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()), + : Line(new UnwrappedLine), AtEndOfPPLine(false), CurrentLines(&Lines), + Style(Style), IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), @@ -180,7 +180,7 @@ void UnwrappedLineParser::reset() { Line.reset(new UnwrappedLine); CommentsBeforeNextToken.clear(); FormatTok = nullptr; - MustBreakBeforeNextToken = false; + AtEndOfPPLine = false; IsDecltypeAutoFunction = false; PreprocessorDirectives.clear(); CurrentLines = &Lines; @@ -3386,75 +3386,15 @@ void UnwrappedLineParser::parseSwitch(bool IsExpr) { NestedTooDeep.pop_back(); } -// Operators that can follow a C variable. -static bool isCOperatorFollowingVar(tok::TokenKind Kind) { - switch (Kind) { - case tok::ampamp: - case tok::ampequal: - case tok::arrow: - case tok::caret: - case tok::caretequal: - case tok::comma: - case tok::ellipsis: - case tok::equal: - case tok::equalequal: - case tok::exclaim: - case tok::exclaimequal: - case tok::greater: - case tok::greaterequal: - case tok::greatergreater: - case tok::greatergreaterequal: - case tok::l_paren: - case tok::l_square: - case tok::less: - case tok::lessequal: - case tok::lessless: - case tok::lesslessequal: - case tok::minus: - case tok::minusequal: - case tok::minusminus: - case tok::percent: - case tok::percentequal: - case tok::period: - case tok::pipe: - case tok::pipeequal: - case tok::pipepipe: - case tok::plus: - case tok::plusequal: - case tok::plusplus: - case tok::question: - case tok::r_brace: - case tok::r_paren: - case tok::r_square: - case tok::semi: - case tok::slash: - case tok::slashequal: - case tok::star: - case tok::starequal: - return true; - default: - return false; - } -} - void UnwrappedLineParser::parseAccessSpecifier() { - FormatToken *AccessSpecifierCandidate = FormatTok; nextToken(); // Understand Qt's slots. if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) nextToken(); // Otherwise, we don't know what it is, and we'd better keep the next token. - if (FormatTok->is(tok::colon)) { + if (FormatTok->is(tok::colon)) nextToken(); - addUnwrappedLine(); - } else if (FormatTok->isNot(tok::coloncolon) && - !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { - // Not a variable name nor namespace name. - addUnwrappedLine(); - } else if (AccessSpecifierCandidate) { - // Consider the access specifier to be a C identifier. - AccessSpecifierCandidate->Tok.setKind(tok::identifier); - } + addUnwrappedLine(); } /// \brief Parses a requires, decides if it is a clause or an expression. @@ -5090,10 +5030,12 @@ UnwrappedLineParser::parseMacroCall() { void UnwrappedLineParser::pushToken(FormatToken *Tok) { Line->Tokens.push_back(UnwrappedLineNode(Tok)); - if (MustBreakBeforeNextToken) { - Line->Tokens.back().Tok->MustBreakBefore = true; - Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; - MustBreakBeforeNextToken = false; + if (AtEndOfPPLine) { + auto &Tok = *Line->Tokens.back().Tok; + Tok.MustBreakBefore = true; + Tok.MustBreakBeforeFinalized = true; + Tok.FirstAfterPPLine = true; + AtEndOfPPLine = false; } } diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 08bff2748eb8f..87650c2756cd1 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -298,8 +298,11 @@ class UnwrappedLineParser { // Since the next token might already be in a new unwrapped line, we need to // store the comments belonging to that token. SmallVector CommentsBeforeNextToken; + FormatToken *FormatTok = nullptr; - bool MustBreakBeforeNextToken; + + // Has just finished parsing a preprocessor line. + bool AtEndOfPPLine; // The parsed lines. Only added to through \c CurrentLines. SmallVector Lines; diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index 1ea4a2e9e88cf..60e103e643e27 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -403,7 +403,8 @@ class DefaultTemplateInstCallback : public TemplateInstantiationCallback { } private: - static std::string toString(CodeSynthesisContext::SynthesisKind Kind) { + static std::optional + toString(CodeSynthesisContext::SynthesisKind Kind) { switch (Kind) { case CodeSynthesisContext::TemplateInstantiation: return "TemplateInstantiation"; @@ -461,8 +462,10 @@ class DefaultTemplateInstCallback : public TemplateInstantiationCallback { return "TypeAliasTemplateInstantiation"; case CodeSynthesisContext::PartialOrderingTTP: return "PartialOrderingTTP"; + case CodeSynthesisContext::CheckTemplateParameter: + return std::nullopt; } - return ""; + return std::nullopt; } template @@ -470,12 +473,14 @@ class DefaultTemplateInstCallback : public TemplateInstantiationCallback { const CodeSynthesisContext &Inst) { std::string YAML; { + std::optional Entry = + getTemplightEntry(TheSema, Inst); + if (!Entry) + return; llvm::raw_string_ostream OS(YAML); llvm::yaml::Output YO(OS); - TemplightEntry Entry = - getTemplightEntry(TheSema, Inst); llvm::yaml::EmptyContext Context; - llvm::yaml::yamlize(YO, Entry, true, Context); + llvm::yaml::yamlize(YO, *Entry, true, Context); } Out << "---" << YAML << "\n"; } @@ -555,10 +560,13 @@ class DefaultTemplateInstCallback : public TemplateInstantiationCallback { } template - static TemplightEntry getTemplightEntry(const Sema &TheSema, - const CodeSynthesisContext &Inst) { + static std::optional + getTemplightEntry(const Sema &TheSema, const CodeSynthesisContext &Inst) { TemplightEntry Entry; - Entry.Kind = toString(Inst.Kind); + std::optional Kind = toString(Inst.Kind); + if (!Kind) + return std::nullopt; + Entry.Kind = *Kind; Entry.Event = BeginInstantiation ? "Begin" : "End"; llvm::raw_string_ostream OS(Entry.Name); printEntryName(TheSema, Inst.Entity, OS); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index e1dc728558def..1a816cb6269d4 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -664,7 +664,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_lambdas", "200907L"); Builder.defineMacro("__cpp_constexpr", LangOpts.CPlusPlus26 ? "202406L" : LangOpts.CPlusPlus23 ? "202211L" - : LangOpts.CPlusPlus20 ? "201907L" + : LangOpts.CPlusPlus20 ? "202002L" : LangOpts.CPlusPlus17 ? "201603L" : LangOpts.CPlusPlus14 ? "201304L" : "200704"); diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index a3a505bcb7f88..e5bf8f35f7d52 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -86,6 +86,7 @@ set(hlsl_h ) set(hlsl_subdir_files hlsl/hlsl_basic_types.h + hlsl/hlsl_alias_intrinsics.h hlsl/hlsl_intrinsics.h hlsl/hlsl_detail.h ) diff --git a/clang/lib/Headers/__clang_hip_libdevice_declares.h b/clang/lib/Headers/__clang_hip_libdevice_declares.h index f15198b3d9f93..fa8d918248dd0 100644 --- a/clang/lib/Headers/__clang_hip_libdevice_declares.h +++ b/clang/lib/Headers/__clang_hip_libdevice_declares.h @@ -14,6 +14,8 @@ #include "hip/hip_version.h" #endif // __has_include("hip/hip_version.h") +#define __PRIVATE_AS __attribute__((opencl_private)) + #ifdef __cplusplus extern "C" { #endif @@ -55,8 +57,7 @@ __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); __device__ __attribute__((const)) __device__ float __ocml_fmod_f32(float, float); -__device__ float __ocml_frexp_f32(float, - __attribute__((address_space(5))) int *); +__device__ float __ocml_frexp_f32(float, __PRIVATE_AS int *); __device__ __attribute__((const)) float __ocml_hypot_f32(float, float); __device__ __attribute__((const)) int __ocml_ilogb_f32(float); __device__ __attribute__((const)) int __ocml_isfinite_f32(float); @@ -74,8 +75,7 @@ __device__ __attribute__((pure)) float __ocml_native_log2_f32(float); __device__ __attribute__((const)) float __ocml_logb_f32(float); __device__ __attribute__((pure)) float __ocml_log_f32(float); __device__ __attribute__((pure)) float __ocml_native_log_f32(float); -__device__ float __ocml_modf_f32(float, - __attribute__((address_space(5))) float *); +__device__ float __ocml_modf_f32(float, __PRIVATE_AS float *); __device__ __attribute__((const)) float __ocml_nearbyint_f32(float); __device__ __attribute__((const)) float __ocml_nextafter_f32(float, float); __device__ __attribute__((const)) float __ocml_len3_f32(float, float, float); @@ -87,8 +87,7 @@ __device__ __attribute__((pure)) float __ocml_pow_f32(float, float); __device__ __attribute__((pure)) float __ocml_pown_f32(float, int); __device__ __attribute__((pure)) float __ocml_rcbrt_f32(float); __device__ __attribute__((const)) float __ocml_remainder_f32(float, float); -__device__ float __ocml_remquo_f32(float, float, - __attribute__((address_space(5))) int *); +__device__ float __ocml_remquo_f32(float, float, __PRIVATE_AS int *); __device__ __attribute__((const)) float __ocml_rhypot_f32(float, float); __device__ __attribute__((const)) float __ocml_rint_f32(float); __device__ __attribute__((const)) float __ocml_rlen3_f32(float, float, float); @@ -99,10 +98,8 @@ __device__ __attribute__((pure)) float __ocml_rsqrt_f32(float); __device__ __attribute__((const)) float __ocml_scalb_f32(float, float); __device__ __attribute__((const)) float __ocml_scalbn_f32(float, int); __device__ __attribute__((const)) int __ocml_signbit_f32(float); -__device__ float __ocml_sincos_f32(float, - __attribute__((address_space(5))) float *); -__device__ float __ocml_sincospi_f32(float, - __attribute__((address_space(5))) float *); +__device__ float __ocml_sincos_f32(float, __PRIVATE_AS float *); +__device__ float __ocml_sincospi_f32(float, __PRIVATE_AS float *); __device__ float __ocml_sin_f32(float); __device__ float __ocml_native_sin_f32(float); __device__ __attribute__((pure)) float __ocml_sinh_f32(float); @@ -176,8 +173,7 @@ __device__ __attribute__((const)) double __ocml_fma_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fmax_f64(double, double); __device__ __attribute__((const)) double __ocml_fmin_f64(double, double); __device__ __attribute__((const)) double __ocml_fmod_f64(double, double); -__device__ double __ocml_frexp_f64(double, - __attribute__((address_space(5))) int *); +__device__ double __ocml_frexp_f64(double, __PRIVATE_AS int *); __device__ __attribute__((const)) double __ocml_hypot_f64(double, double); __device__ __attribute__((const)) int __ocml_ilogb_f64(double); __device__ __attribute__((const)) int __ocml_isfinite_f64(double); @@ -192,8 +188,7 @@ __device__ __attribute__((pure)) double __ocml_log1p_f64(double); __device__ __attribute__((pure)) double __ocml_log2_f64(double); __device__ __attribute__((const)) double __ocml_logb_f64(double); __device__ __attribute__((pure)) double __ocml_log_f64(double); -__device__ double __ocml_modf_f64(double, - __attribute__((address_space(5))) double *); +__device__ double __ocml_modf_f64(double, __PRIVATE_AS double *); __device__ __attribute__((const)) double __ocml_nearbyint_f64(double); __device__ __attribute__((const)) double __ocml_nextafter_f64(double, double); __device__ __attribute__((const)) double __ocml_len3_f64(double, double, @@ -206,8 +201,7 @@ __device__ __attribute__((pure)) double __ocml_pow_f64(double, double); __device__ __attribute__((pure)) double __ocml_pown_f64(double, int); __device__ __attribute__((pure)) double __ocml_rcbrt_f64(double); __device__ __attribute__((const)) double __ocml_remainder_f64(double, double); -__device__ double __ocml_remquo_f64(double, double, - __attribute__((address_space(5))) int *); +__device__ double __ocml_remquo_f64(double, double, __PRIVATE_AS int *); __device__ __attribute__((const)) double __ocml_rhypot_f64(double, double); __device__ __attribute__((const)) double __ocml_rint_f64(double); __device__ __attribute__((const)) double __ocml_rlen3_f64(double, double, @@ -219,10 +213,8 @@ __device__ __attribute__((pure)) double __ocml_rsqrt_f64(double); __device__ __attribute__((const)) double __ocml_scalb_f64(double, double); __device__ __attribute__((const)) double __ocml_scalbn_f64(double, int); __device__ __attribute__((const)) int __ocml_signbit_f64(double); -__device__ double __ocml_sincos_f64(double, - __attribute__((address_space(5))) double *); -__device__ double -__ocml_sincospi_f64(double, __attribute__((address_space(5))) double *); +__device__ double __ocml_sincos_f64(double, __PRIVATE_AS double *); +__device__ double __ocml_sincospi_f64(double, __PRIVATE_AS double *); __device__ double __ocml_sin_f64(double); __device__ __attribute__((pure)) double __ocml_sinh_f64(double); __device__ double __ocml_sinpi_f64(double); diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index 8468751d9de26..f6c06eaf4afe0 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -33,6 +33,9 @@ #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #endif +#pragma push_macro("__PRIVATE_AS") + +#define __PRIVATE_AS __attribute__((opencl_private)) // Device library provides fast low precision and slow full-recision // implementations for some functions. Which one gets selected depends on // __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if @@ -389,7 +392,7 @@ __DEVICE__ float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } __DEVICE__ -float exp10f(float __x) { return __ocml_exp10_f32(__x); } +float exp10f(float __x) { return __builtin_exp10f(__x); } __DEVICE__ float exp2f(float __x) { return __builtin_exp2f(__x); } @@ -492,13 +495,13 @@ __DEVICE__ float log1pf(float __x) { return __ocml_log1p_f32(__x); } __DEVICE__ -float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); } +float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __builtin_log2f)(__x); } __DEVICE__ float logbf(float __x) { return __ocml_logb_f32(__x); } __DEVICE__ -float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); } +float logf(float __x) { return __FAST_OR_SLOW(__logf, __builtin_logf)(__x); } __DEVICE__ long int lrintf(float __x) { return __builtin_rintf(__x); } @@ -512,8 +515,7 @@ float modff(float __x, float *__iptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - float __r = - __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); + float __r = __ocml_modf_f32(__x, (__PRIVATE_AS float *)&__tmp); *__iptr = __tmp; return __r; } @@ -595,8 +597,7 @@ float remquof(float __x, float __y, int *__quo) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - float __r = __ocml_remquo_f32( - __x, __y, (__attribute__((address_space(5))) int *)&__tmp); + float __r = __ocml_remquo_f32(__x, __y, (__PRIVATE_AS int *)&__tmp); *__quo = __tmp; return __r; @@ -638,8 +639,11 @@ float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } __DEVICE__ float scalblnf(float __x, long int __n) { - return (__n < INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n) - : __ocml_scalb_f32(__x, __n); + if (__n > INT_MAX) + __n = INT_MAX; + else if (__n < INT_MIN) + __n = INT_MIN; + return __builtin_ldexpf(__x, (int)__n); } __DEVICE__ @@ -657,8 +661,7 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) { #ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__ __sincosf(__x, __sinptr, __cosptr); #else - *__sinptr = - __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); + *__sinptr = __ocml_sincos_f32(__x, (__PRIVATE_AS float *)&__tmp); *__cosptr = __tmp; #endif } @@ -669,8 +672,7 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - *__sinptr = __ocml_sincospi_f32( - __x, (__attribute__((address_space(5))) float *)&__tmp); + *__sinptr = __ocml_sincospi_f32(__x, (__PRIVATE_AS float *)&__tmp); *__cosptr = __tmp; } @@ -913,8 +915,7 @@ double modf(double __x, double *__iptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - double __r = - __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); + double __r = __ocml_modf_f64(__x, (__PRIVATE_AS double *)&__tmp); *__iptr = __tmp; return __r; @@ -1004,8 +1005,7 @@ double remquo(double __x, double __y, int *__quo) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - double __r = __ocml_remquo_f64( - __x, __y, (__attribute__((address_space(5))) int *)&__tmp); + double __r = __ocml_remquo_f64(__x, __y, (__PRIVATE_AS int *)&__tmp); *__quo = __tmp; return __r; @@ -1047,8 +1047,11 @@ double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } __DEVICE__ double scalbln(double __x, long int __n) { - return (__n < INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n) - : __ocml_scalb_f64(__x, __n); + if (__n > INT_MAX) + __n = INT_MAX; + else if (__n < INT_MIN) + __n = INT_MIN; + return __builtin_ldexp(__x, (int)__n); } __DEVICE__ double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); } @@ -1065,8 +1068,7 @@ void sincos(double __x, double *__sinptr, double *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - *__sinptr = __ocml_sincos_f64( - __x, (__attribute__((address_space(5))) double *)&__tmp); + *__sinptr = __ocml_sincos_f64(__x, (__PRIVATE_AS double *)&__tmp); *__cosptr = __tmp; } @@ -1076,8 +1078,7 @@ void sincospi(double __x, double *__sinptr, double *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - *__sinptr = __ocml_sincospi_f64( - __x, (__attribute__((address_space(5))) double *)&__tmp); + *__sinptr = __ocml_sincospi_f64(__x, (__PRIVATE_AS double *)&__tmp); *__cosptr = __tmp; } @@ -1322,6 +1323,7 @@ __host__ inline static int max(int __arg1, int __arg2) { #endif #pragma pop_macro("__DEVICE__") +#pragma pop_macro("__PRIVATE_AS") #pragma pop_macro("__RETURN_TYPE") #pragma pop_macro("__FAST_OR_SLOW") diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h index ed1550038e63e..da1e39ac7270e 100644 --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -125,11 +125,13 @@ typedef __SIZE_TYPE__ size_t; #pragma push_macro("uint64_t") #pragma push_macro("CHAR_BIT") #pragma push_macro("INT_MAX") +#pragma push_macro("INT_MIN") #define NULL (void *)0 #define uint32_t __UINT32_TYPE__ #define uint64_t __UINT64_TYPE__ #define CHAR_BIT __CHAR_BIT__ #define INT_MAX __INTMAX_MAX__ +#define INT_MIN (-__INT_MAX__ - 1) #endif // __HIPCC_RTC__ #include <__clang_hip_libdevice_declares.h> @@ -154,6 +156,7 @@ typedef __SIZE_TYPE__ size_t; #pragma pop_macro("uint64_t") #pragma pop_macro("CHAR_BIT") #pragma pop_macro("INT_MAX") +#pragma pop_macro("INT_MIN") #endif // __HIPCC_RTC__ #endif // __HIP__ #endif // __CLANG_HIP_RUNTIME_WRAPPER_H__ diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h index 355e75d0b2d42..15409eacf7716 100644 --- a/clang/lib/Headers/amdgpuintrin.h +++ b/clang/lib/Headers/amdgpuintrin.h @@ -121,7 +121,7 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) { uint32_t __hi = (uint32_t)(__x >> 32ull); uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF); return ((uint64_t)__builtin_amdgcn_readfirstlane(__hi) << 32ull) | - ((uint64_t)__builtin_amdgcn_readfirstlane(__lo)); + ((uint64_t)__builtin_amdgcn_readfirstlane(__lo) & 0xFFFFFFFF); } // Returns a bitmask of threads in the current lane for which \p x is true. @@ -187,7 +187,7 @@ __gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) { uint64_t __match_mask = 0; bool __done = 0; - while (__gpu_ballot(__lane_mask, __done)) { + while (__gpu_ballot(__lane_mask, !__done)) { if (!__done) { uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x); if (__first == __x) { diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h new file mode 100644 index 0000000000000..7573f6e024167 --- /dev/null +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -0,0 +1,2797 @@ +//===--- hlsl_alias_intrinsics.h - HLSL alias definitions for intrinsics --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _HLSL_HLSL_ALIAS_INTRINSICS_H_ +#define _HLSL_HLSL_ALIAS_INTRINSICS_H_ + +namespace hlsl { + +// Note: Functions in this file are sorted alphabetically, then grouped by base +// element type, and the element types are sorted by size, then singed integer, +// unsigned integer and floating point. Keeping this ordering consistent will +// help keep this file manageable as it grows. + +#define _HLSL_BUILTIN_ALIAS(builtin) \ + __attribute__((clang_builtin_alias(builtin))) +#define _HLSL_AVAILABILITY(platform, version) \ + __attribute__((availability(platform, introduced = version))) +#define _HLSL_AVAILABILITY_STAGE(platform, version, stage) \ + __attribute__(( \ + availability(platform, introduced = version, environment = stage))) + +#ifdef __HLSL_ENABLE_16_BIT +#define _HLSL_16BIT_AVAILABILITY(platform, version) \ + __attribute__((availability(platform, introduced = version))) +#define _HLSL_16BIT_AVAILABILITY_STAGE(platform, version, stage) \ + __attribute__(( \ + availability(platform, introduced = version, environment = stage))) +#else +#define _HLSL_16BIT_AVAILABILITY(environment, version) +#define _HLSL_16BIT_AVAILABILITY_STAGE(environment, version, stage) +#endif + +#define GEN_VEC_SCALAR_OVERLOADS(FUNC_NAME, BASE_TYPE, AVAIL) \ + GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, BASE_TYPE##2, AVAIL) \ + GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, BASE_TYPE##3, AVAIL) \ + GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, BASE_TYPE##4, AVAIL) + +#define GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, VECTOR_TYPE, AVAIL) \ + IF_TRUE_##AVAIL( \ + _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_TYPE \ + FUNC_NAME(VECTOR_TYPE p0, BASE_TYPE p1) { \ + return __builtin_elementwise_##FUNC_NAME(p0, (VECTOR_TYPE)p1); \ + } \ + IF_TRUE_##AVAIL( \ + _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_TYPE \ + FUNC_NAME(BASE_TYPE p0, VECTOR_TYPE p1) { \ + return __builtin_elementwise_##FUNC_NAME((VECTOR_TYPE)p0, p1); \ + } + +#define IF_TRUE_0(EXPR) +#define IF_TRUE_1(EXPR) EXPR + +//===----------------------------------------------------------------------===// +// abs builtins +//===----------------------------------------------------------------------===// + +/// \fn T abs(T Val) +/// \brief Returns the absolute value of the input value, \a Val. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int16_t abs(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int16_t2 abs(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int16_t3 abs(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int16_t4 abs(int16_t4); + +_HLSL_AVAILABILITY(shadermodel, 6.2) +constexpr uint16_t abs(uint16_t V) { return V; } +_HLSL_AVAILABILITY(shadermodel, 6.2) +constexpr uint16_t2 abs(uint16_t2 V) { return V; } +_HLSL_AVAILABILITY(shadermodel, 6.2) +constexpr uint16_t3 abs(uint16_t3 V) { return V; } +_HLSL_AVAILABILITY(shadermodel, 6.2) +constexpr uint16_t4 abs(uint16_t4 V) { return V; } +#endif + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +half abs(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +half2 abs(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +half3 abs(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +half4 abs(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int abs(int); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int2 abs(int2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int3 abs(int3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int4 abs(int4); + +constexpr uint abs(uint V) { return V; } +constexpr uint2 abs(uint2 V) { return V; } +constexpr uint3 abs(uint3 V) { return V; } +constexpr uint4 abs(uint4 V) { return V; } + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +float abs(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +float2 abs(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +float3 abs(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +float4 abs(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int64_t abs(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int64_t2 abs(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int64_t3 abs(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +int64_t4 abs(int64_t4); + +constexpr uint64_t abs(uint64_t V) { return V; } +constexpr uint64_t2 abs(uint64_t2 V) { return V; } +constexpr uint64_t3 abs(uint64_t3 V) { return V; } +constexpr uint64_t4 abs(uint64_t4 V) { return V; } + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +double abs(double); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +double2 abs(double2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +double3 abs(double3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +double4 abs(double4); + +//===----------------------------------------------------------------------===// +// acos builtins +//===----------------------------------------------------------------------===// + +/// \fn T acos(T Val) +/// \brief Returns the arccosine of the input value, \a Val. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) +half acos(half); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) +half2 acos(half2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) +half3 acos(half3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) +half4 acos(half4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) +float acos(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) +float2 acos(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) +float3 acos(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) +float4 acos(float4); + +//===----------------------------------------------------------------------===// +// AddUint64 builtins +//===----------------------------------------------------------------------===// + +/// \fn T AddUint64(T a, T b) +/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned +/// 32-bit integers. +/// \param x [in] The first unsigned 32-bit integer pair(s) +/// \param y [in] The second unsigned 32-bit integer pair(s) +/// +/// This function takes one or two pairs (low, high) of unsigned 32-bit integer +/// values and returns pairs (low, high) of unsigned 32-bit integer +/// values representing the result of unsigned 64-bit integer addition. + +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64) +uint32_t2 AddUint64(uint32_t2, uint32_t2); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64) +uint32_t4 AddUint64(uint32_t4, uint32_t4); + +//===----------------------------------------------------------------------===// +// all builtins +//===----------------------------------------------------------------------===// + +/// \fn bool all(T x) +/// \brief Returns True if all components of the \a x parameter are non-zero; +/// otherwise, false. \param x The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint16_t4); +#endif + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(bool); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(bool2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(bool3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(bool4); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(uint64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) +bool all(double4); + +//===----------------------------------------------------------------------===// +// and builtins +//===----------------------------------------------------------------------===// + +/// \fn bool and(bool x, bool y) +/// \brief Logically ands two boolean vectors elementwise and produces a bool +/// vector output. + +// TODO: Clean up clang-format marker once we've resolved +// https://github.com/llvm/llvm-project/issues/127851 +// +// clang-format off +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) +bool and(bool x, bool y); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) +bool2 and(bool2 x, bool2 y); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) +bool3 and(bool3 x, bool3 y); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) +bool4 and(bool4 x, bool4 y); +// clang-format on + +//===----------------------------------------------------------------------===// +// any builtins +//===----------------------------------------------------------------------===// + +/// \fn bool any(T x) +/// \brief Returns True if any components of the \a x parameter are non-zero; +/// otherwise, false. \param x The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint16_t4); +#endif + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(bool); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(bool2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(bool3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(bool4); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(uint64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) +bool any(double4); + +//===----------------------------------------------------------------------===// +// asdouble builtins +//===----------------------------------------------------------------------===// + +/// \fn double asdouble(uint LowBits, uint HighBits) +/// \brief Reinterprets a cast value (two 32-bit values) into a double. +/// \param LowBits The low 32-bit pattern of the input value. +/// \param HighBits The high 32-bit pattern of the input value. + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) +double asdouble(uint, uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) +double2 asdouble(uint2, uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) +double3 asdouble(uint3, uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) +double4 asdouble(uint4, uint4); + +//===----------------------------------------------------------------------===// +// asin builtins +//===----------------------------------------------------------------------===// + +/// \fn T asin(T Val) +/// \brief Returns the arcsine of the input value, \a Val. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) +half asin(half); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) +half2 asin(half2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) +half3 asin(half3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) +half4 asin(half4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) +float asin(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) +float2 asin(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) +float3 asin(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) +float4 asin(float4); + +//===----------------------------------------------------------------------===// +// atan builtins +//===----------------------------------------------------------------------===// + +/// \fn T atan(T Val) +/// \brief Returns the arctangent of the input value, \a Val. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) +half atan(half); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) +half2 atan(half2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) +half3 atan(half3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) +half4 atan(half4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) +float atan(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) +float2 atan(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) +float3 atan(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) +float4 atan(float4); + +//===----------------------------------------------------------------------===// +// atan2 builtins +//===----------------------------------------------------------------------===// + +/// \fn T atan2(T y, T x) +/// \brief Returns the arctangent of y/x, using the signs of the arguments to +/// determine the correct quadrant. +/// \param y The y-coordinate. +/// \param x The x-coordinate. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) +half atan2(half y, half x); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) +half2 atan2(half2 y, half2 x); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) +half3 atan2(half3 y, half3 x); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) +half4 atan2(half4 y, half4 x); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) +float atan2(float y, float x); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) +float2 atan2(float2 y, float2 x); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) +float3 atan2(float3 y, float3 x); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) +float4 atan2(float4 y, float4 x); + +//===----------------------------------------------------------------------===// +// ceil builtins +//===----------------------------------------------------------------------===// + +/// \fn T ceil(T Val) +/// \brief Returns the smallest integer value that is greater than or equal to +/// the input value, \a Val. +/// \param Val The input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) +half ceil(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) +half2 ceil(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) +half3 ceil(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) +half4 ceil(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) +float ceil(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) +float2 ceil(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) +float3 ceil(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) +float4 ceil(float4); + +//===----------------------------------------------------------------------===// +// clamp builtins +//===----------------------------------------------------------------------===// + +/// \fn T clamp(T X, T Min, T Max) +/// \brief Clamps the specified value \a X to the specified +/// minimum ( \a Min) and maximum ( \a Max) range. +/// \param X A value to clamp. +/// \param Min The specified minimum range. +/// \param Max The specified maximum range. +/// +/// Returns The clamped value for the \a X parameter. +/// For values of -INF or INF, clamp will behave as expected. +/// However for values of NaN, the results are undefined. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +half clamp(half, half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +half2 clamp(half2, half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +half3 clamp(half3, half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +half4 clamp(half4, half4, half4); + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int16_t clamp(int16_t, int16_t, int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int16_t2 clamp(int16_t2, int16_t2, int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int16_t3 clamp(int16_t3, int16_t3, int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int16_t4 clamp(int16_t4, int16_t4, int16_t4); + +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint16_t clamp(uint16_t, uint16_t, uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint16_t2 clamp(uint16_t2, uint16_t2, uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint16_t3 clamp(uint16_t3, uint16_t3, uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint16_t4 clamp(uint16_t4, uint16_t4, uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int clamp(int, int, int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int2 clamp(int2, int2, int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int3 clamp(int3, int3, int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int4 clamp(int4, int4, int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint clamp(uint, uint, uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint2 clamp(uint2, uint2, uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint3 clamp(uint3, uint3, uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint4 clamp(uint4, uint4, uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int64_t clamp(int64_t, int64_t, int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int64_t2 clamp(int64_t2, int64_t2, int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int64_t3 clamp(int64_t3, int64_t3, int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int64_t4 clamp(int64_t4, int64_t4, int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint64_t clamp(uint64_t, uint64_t, uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint64_t2 clamp(uint64_t2, uint64_t2, uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint64_t3 clamp(uint64_t3, uint64_t3, uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint64_t4 clamp(uint64_t4, uint64_t4, uint64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +float clamp(float, float, float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +float2 clamp(float2, float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +float3 clamp(float3, float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +float4 clamp(float4, float4, float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +double clamp(double, double, double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +double2 clamp(double2, double2, double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +double3 clamp(double3, double3, double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +double4 clamp(double4, double4, double4); + +//===----------------------------------------------------------------------===// +// clip builtins +//===----------------------------------------------------------------------===// + +/// \fn void clip(T Val) +/// \brief Discards the current pixel if the specified value is less than zero. +/// \param Val The input value. + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) +void clip(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) +void clip(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) +void clip(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) +void clip(float4); + +//===----------------------------------------------------------------------===// +// cos builtins +//===----------------------------------------------------------------------===// + +/// \fn T cos(T Val) +/// \brief Returns the cosine of the input value, \a Val. +/// \param Val The input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) +half cos(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) +half2 cos(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) +half3 cos(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) +half4 cos(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) +float cos(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) +float2 cos(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) +float3 cos(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) +float4 cos(float4); + +//===----------------------------------------------------------------------===// +// cosh builtins +//===----------------------------------------------------------------------===// + +/// \fn T cosh(T Val) +/// \brief Returns the hyperbolic cosine of the input value, \a Val. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) +half cosh(half); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) +half2 cosh(half2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) +half3 cosh(half3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) +half4 cosh(half4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) +float cosh(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) +float2 cosh(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) +float3 cosh(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) +float4 cosh(float4); + +//===----------------------------------------------------------------------===// +// count bits builtins +//===----------------------------------------------------------------------===// + +/// \fn T countbits(T Val) +/// \brief Return the number of bits (per component) set in the input integer. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +const inline uint countbits(int16_t x) { + return __builtin_elementwise_popcount(x); +} +_HLSL_AVAILABILITY(shadermodel, 6.2) +const inline uint2 countbits(int16_t2 x) { + return __builtin_elementwise_popcount(x); +} +_HLSL_AVAILABILITY(shadermodel, 6.2) +const inline uint3 countbits(int16_t3 x) { + return __builtin_elementwise_popcount(x); +} +_HLSL_AVAILABILITY(shadermodel, 6.2) +const inline uint4 countbits(int16_t4 x) { + return __builtin_elementwise_popcount(x); +} +_HLSL_AVAILABILITY(shadermodel, 6.2) +const inline uint countbits(uint16_t x) { + return __builtin_elementwise_popcount(x); +} +_HLSL_AVAILABILITY(shadermodel, 6.2) +const inline uint2 countbits(uint16_t2 x) { + return __builtin_elementwise_popcount(x); +} +_HLSL_AVAILABILITY(shadermodel, 6.2) +const inline uint3 countbits(uint16_t3 x) { + return __builtin_elementwise_popcount(x); +} +_HLSL_AVAILABILITY(shadermodel, 6.2) +const inline uint4 countbits(uint16_t4 x) { + return __builtin_elementwise_popcount(x); +} +#endif + +const inline uint countbits(int x) { return __builtin_elementwise_popcount(x); } +const inline uint2 countbits(int2 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint3 countbits(int3 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint4 countbits(int4 x) { + return __builtin_elementwise_popcount(x); +} + +const inline uint countbits(uint x) { + return __builtin_elementwise_popcount(x); +} +const inline uint2 countbits(uint2 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint3 countbits(uint3 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint4 countbits(uint4 x) { + return __builtin_elementwise_popcount(x); +} + +const inline uint countbits(int64_t x) { + return __builtin_elementwise_popcount(x); +} +const inline uint2 countbits(int64_t2 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint3 countbits(int64_t3 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint4 countbits(int64_t4 x) { + return __builtin_elementwise_popcount(x); +} + +const inline uint countbits(uint64_t x) { + return __builtin_elementwise_popcount(x); +} +const inline uint2 countbits(uint64_t2 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint3 countbits(uint64_t3 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint4 countbits(uint64_t4 x) { + return __builtin_elementwise_popcount(x); +} + +//===----------------------------------------------------------------------===// +// degrees builtins +//===----------------------------------------------------------------------===// + +/// \fn T degrees(T x) +/// \brief Converts the specified value from radians to degrees. +/// \param x The specified input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) +half degrees(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) +half2 degrees(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) +half3 degrees(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) +half4 degrees(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) +float degrees(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) +float2 degrees(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) +float3 degrees(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) +float4 degrees(float4); + +//===----------------------------------------------------------------------===// +// dot product builtins +//===----------------------------------------------------------------------===// + +/// \fn K dot(T X, T Y) +/// \brief Return the dot product (a scalar value) of \a X and \a Y. +/// \param X The X input value. +/// \param Y The Y input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +half dot(half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +half dot(half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +half dot(half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +half dot(half4, half4); + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int16_t dot(int16_t, int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int16_t dot(int16_t2, int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int16_t dot(int16_t3, int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int16_t dot(int16_t4, int16_t4); + +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint16_t dot(uint16_t, uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint16_t dot(uint16_t2, uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint16_t dot(uint16_t3, uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint16_t dot(uint16_t4, uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +float dot(float, float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +float dot(float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +float dot(float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +float dot(float4, float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +double dot(double, double); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int dot(int, int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int dot(int2, int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int dot(int3, int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int dot(int4, int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint dot(uint, uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint dot(uint2, uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint dot(uint3, uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint dot(uint4, uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int64_t dot(int64_t, int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int64_t dot(int64_t2, int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int64_t dot(int64_t3, int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int64_t dot(int64_t4, int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint64_t dot(uint64_t, uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint64_t dot(uint64_t2, uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint64_t dot(uint64_t3, uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint64_t dot(uint64_t4, uint64_t4); + +//===----------------------------------------------------------------------===// +// dot4add builtins +//===----------------------------------------------------------------------===// + +/// \fn int dot4add_i8packed(uint A, uint B, int C) + +_HLSL_AVAILABILITY(shadermodel, 6.4) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_i8packed) +int dot4add_i8packed(uint, uint, int); + +/// \fn uint dot4add_u8packed(uint A, uint B, uint C) + +_HLSL_AVAILABILITY(shadermodel, 6.4) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_u8packed) +uint dot4add_u8packed(uint, uint, uint); + +//===----------------------------------------------------------------------===// +// exp builtins +//===----------------------------------------------------------------------===// + +/// \fn T exp(T x) +/// \brief Returns the base-e exponential, or \a e**x, of the specified value. +/// \param x The specified input value. +/// +/// The return value is the base-e exponential of the \a x parameter. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +half exp(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +half2 exp(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +half3 exp(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +half4 exp(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +float exp(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +float2 exp(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +float3 exp(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +float4 exp(float4); + +//===----------------------------------------------------------------------===// +// exp2 builtins +//===----------------------------------------------------------------------===// + +/// \fn T exp2(T x) +/// \brief Returns the base 2 exponential, or \a 2**x, of the specified value. +/// \param x The specified input value. +/// +/// The base 2 exponential of the \a x parameter. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +half exp2(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +half2 exp2(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +half3 exp2(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +half4 exp2(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +float exp2(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +float2 exp2(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +float3 exp2(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +float4 exp2(float4); + +//===----------------------------------------------------------------------===// +// firstbithigh builtins +//===----------------------------------------------------------------------===// + +/// \fn T firstbithigh(T Val) +/// \brief Returns the location of the first set bit starting from the highest +/// order bit and working downward, per component. +/// \param Val the input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(uint64_t4); + +//===----------------------------------------------------------------------===// +// firstbitlow builtins +//===----------------------------------------------------------------------===// + +/// \fn T firstbitlow(T Val) +/// \brief Returns the location of the first set bit starting from the lowest +/// order bit and working upward, per component. +/// \param Val the input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint64_t4); + +//===----------------------------------------------------------------------===// +// floor builtins +//===----------------------------------------------------------------------===// + +/// \fn T floor(T Val) +/// \brief Returns the largest integer that is less than or equal to the input +/// value, \a Val. +/// \param Val The input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +half floor(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +half2 floor(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +half3 floor(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +half4 floor(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +float floor(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +float2 floor(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +float3 floor(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +float4 floor(float4); + +//===----------------------------------------------------------------------===// +// fmod builtins +//===----------------------------------------------------------------------===// + +/// \fn T fmod(T x, T y) +/// \brief Returns the linear interpolation of x to y. +/// \param x [in] The dividend. +/// \param y [in] The divisor. +/// +/// Return the floating-point remainder of the x parameter divided by the y +/// parameter. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) +half fmod(half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) +half2 fmod(half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) +half3 fmod(half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) +half4 fmod(half4, half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) +float fmod(float, float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) +float2 fmod(float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) +float3 fmod(float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) +float4 fmod(float4, float4); + +//===----------------------------------------------------------------------===// +// frac builtins +//===----------------------------------------------------------------------===// + +/// \fn T frac(T x) +/// \brief Returns the fractional (or decimal) part of x. \a x parameter. +/// \param x The specified input value. +/// +/// If \a the return value is greater than or equal to 0 and less than 1. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +half frac(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +half2 frac(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +half3 frac(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +half4 frac(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +float frac(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +float2 frac(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +float3 frac(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +float4 frac(float4); + +//===----------------------------------------------------------------------===// +// isinf builtins +//===----------------------------------------------------------------------===// + +/// \fn T isinf(T x) +/// \brief Determines if the specified value \a x is infinite. +/// \param x The specified input value. +/// +/// Returns a value of the same size as the input, with a value set +/// to True if the x parameter is +INF or -INF. Otherwise, False. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) +bool isinf(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) +bool2 isinf(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) +bool3 isinf(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) +bool4 isinf(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) +bool isinf(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) +bool2 isinf(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) +bool3 isinf(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) +bool4 isinf(float4); + +//===----------------------------------------------------------------------===// +// lerp builtins +//===----------------------------------------------------------------------===// + +/// \fn T lerp(T x, T y, T s) +/// \brief Returns the linear interpolation of x to y by s. +/// \param x [in] The first-floating point value. +/// \param y [in] The second-floating point value. +/// \param s [in] A value that linearly interpolates between the x parameter and +/// the y parameter. +/// +/// Linear interpolation is based on the following formula: x*(1-s) + y*s which +/// can equivalently be written as x + s(y-x). + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +half lerp(half, half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +half2 lerp(half2, half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +half3 lerp(half3, half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +half4 lerp(half4, half4, half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +float lerp(float, float, float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +float2 lerp(float2, float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +float3 lerp(float3, float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +float4 lerp(float4, float4, float4); + +//===----------------------------------------------------------------------===// +// log builtins +//===----------------------------------------------------------------------===// + +/// \fn T log(T Val) +/// \brief The base-e logarithm of the input value, \a Val parameter. +/// \param Val The input value. +/// +/// If \a Val is negative, this result is undefined. If \a Val is 0, this +/// function returns negative infinity. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) +half log(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) +half2 log(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) +half3 log(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) +half4 log(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) +float log(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) +float2 log(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) +float3 log(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) +float4 log(float4); + +//===----------------------------------------------------------------------===// +// log10 builtins +//===----------------------------------------------------------------------===// + +/// \fn T log10(T Val) +/// \brief The base-10 logarithm of the input value, \a Val parameter. +/// \param Val The input value. +/// +/// If \a Val is negative, this result is undefined. If \a Val is 0, this +/// function returns negative infinity. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) +half log10(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) +half2 log10(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) +half3 log10(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) +half4 log10(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) +float log10(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) +float2 log10(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) +float3 log10(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) +float4 log10(float4); + +//===----------------------------------------------------------------------===// +// log2 builtins +//===----------------------------------------------------------------------===// + +/// \fn T log2(T Val) +/// \brief The base-2 logarithm of the input value, \a Val parameter. +/// \param Val The input value. +/// +/// If \a Val is negative, this result is undefined. If \a Val is 0, this +/// function returns negative infinity. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) +half log2(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) +half2 log2(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) +half3 log2(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) +half4 log2(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) +float log2(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) +float2 log2(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) +float3 log2(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) +float4 log2(float4); + +//===----------------------------------------------------------------------===// +// mad builtins +//===----------------------------------------------------------------------===// + +/// \fn T mad(T M, T A, T B) +/// \brief The result of \a M * \a A + \a B. +/// \param M The multiplication value. +/// \param A The first addition value. +/// \param B The second addition value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +half mad(half, half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +half2 mad(half2, half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +half3 mad(half3, half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +half4 mad(half4, half4, half4); + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int16_t mad(int16_t, int16_t, int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int16_t2 mad(int16_t2, int16_t2, int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int16_t3 mad(int16_t3, int16_t3, int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int16_t4 mad(int16_t4, int16_t4, int16_t4); + +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint16_t mad(uint16_t, uint16_t, uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint16_t2 mad(uint16_t2, uint16_t2, uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint16_t3 mad(uint16_t3, uint16_t3, uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint16_t4 mad(uint16_t4, uint16_t4, uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int mad(int, int, int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int2 mad(int2, int2, int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int3 mad(int3, int3, int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int4 mad(int4, int4, int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint mad(uint, uint, uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint2 mad(uint2, uint2, uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint3 mad(uint3, uint3, uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint4 mad(uint4, uint4, uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int64_t mad(int64_t, int64_t, int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int64_t2 mad(int64_t2, int64_t2, int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int64_t3 mad(int64_t3, int64_t3, int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +int64_t4 mad(int64_t4, int64_t4, int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint64_t mad(uint64_t, uint64_t, uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint64_t2 mad(uint64_t2, uint64_t2, uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint64_t3 mad(uint64_t3, uint64_t3, uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +uint64_t4 mad(uint64_t4, uint64_t4, uint64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +float mad(float, float, float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +float2 mad(float2, float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +float3 mad(float3, float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +float4 mad(float4, float4, float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +double mad(double, double, double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +double2 mad(double2, double2, double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +double3 mad(double3, double3, double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) +double4 mad(double4, double4, double4); + +//===----------------------------------------------------------------------===// +// max builtins +//===----------------------------------------------------------------------===// + +/// \fn T max(T X, T Y) +/// \brief Return the greater of \a X and \a Y. +/// \param X The X input value. +/// \param Y The Y input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +half max(half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +half2 max(half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +half3 max(half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +half4 max(half4, half4); +GEN_VEC_SCALAR_OVERLOADS(max, half, 1) + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int16_t max(int16_t, int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int16_t2 max(int16_t2, int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int16_t3 max(int16_t3, int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int16_t4 max(int16_t4, int16_t4); +GEN_VEC_SCALAR_OVERLOADS(max, int16_t, 1) + +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint16_t max(uint16_t, uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint16_t2 max(uint16_t2, uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint16_t3 max(uint16_t3, uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint16_t4 max(uint16_t4, uint16_t4); +GEN_VEC_SCALAR_OVERLOADS(max, uint16_t, 1) +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int max(int, int); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int2 max(int2, int2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int3 max(int3, int3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int4 max(int4, int4); +GEN_VEC_SCALAR_OVERLOADS(max, int, 0) + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint max(uint, uint); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint2 max(uint2, uint2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint3 max(uint3, uint3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint4 max(uint4, uint4); +GEN_VEC_SCALAR_OVERLOADS(max, uint, 0) + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int64_t max(int64_t, int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int64_t2 max(int64_t2, int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int64_t3 max(int64_t3, int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +int64_t4 max(int64_t4, int64_t4); +GEN_VEC_SCALAR_OVERLOADS(max, int64_t, 0) + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint64_t max(uint64_t, uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint64_t2 max(uint64_t2, uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint64_t3 max(uint64_t3, uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +uint64_t4 max(uint64_t4, uint64_t4); +GEN_VEC_SCALAR_OVERLOADS(max, uint64_t, 0) + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +float max(float, float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +float2 max(float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +float3 max(float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +float4 max(float4, float4); +GEN_VEC_SCALAR_OVERLOADS(max, float, 0) + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +double max(double, double); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +double2 max(double2, double2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +double3 max(double3, double3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) +double4 max(double4, double4); +GEN_VEC_SCALAR_OVERLOADS(max, double, 0) + +//===----------------------------------------------------------------------===// +// min builtins +//===----------------------------------------------------------------------===// + +/// \fn T min(T X, T Y) +/// \brief Return the lesser of \a X and \a Y. +/// \param X The X input value. +/// \param Y The Y input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +half min(half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +half2 min(half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +half3 min(half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +half4 min(half4, half4); +GEN_VEC_SCALAR_OVERLOADS(min, half, 1) + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int16_t min(int16_t, int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int16_t2 min(int16_t2, int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int16_t3 min(int16_t3, int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int16_t4 min(int16_t4, int16_t4); +GEN_VEC_SCALAR_OVERLOADS(min, int16_t, 1) + +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint16_t min(uint16_t, uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint16_t2 min(uint16_t2, uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint16_t3 min(uint16_t3, uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint16_t4 min(uint16_t4, uint16_t4); +GEN_VEC_SCALAR_OVERLOADS(min, uint16_t, 1) +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int min(int, int); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int2 min(int2, int2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int3 min(int3, int3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int4 min(int4, int4); +GEN_VEC_SCALAR_OVERLOADS(min, int, 0) + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint min(uint, uint); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint2 min(uint2, uint2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint3 min(uint3, uint3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint4 min(uint4, uint4); +GEN_VEC_SCALAR_OVERLOADS(min, uint, 0) + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +float min(float, float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +float2 min(float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +float3 min(float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +float4 min(float4, float4); +GEN_VEC_SCALAR_OVERLOADS(min, float, 0) + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int64_t min(int64_t, int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int64_t2 min(int64_t2, int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int64_t3 min(int64_t3, int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +int64_t4 min(int64_t4, int64_t4); +GEN_VEC_SCALAR_OVERLOADS(min, int64_t, 0) + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint64_t min(uint64_t, uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint64_t2 min(uint64_t2, uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint64_t3 min(uint64_t3, uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +uint64_t4 min(uint64_t4, uint64_t4); +GEN_VEC_SCALAR_OVERLOADS(min, uint64_t, 0) + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +double min(double, double); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +double2 min(double2, double2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +double3 min(double3, double3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) +double4 min(double4, double4); +GEN_VEC_SCALAR_OVERLOADS(min, double, 0) + +//===----------------------------------------------------------------------===// +// normalize builtins +//===----------------------------------------------------------------------===// + +/// \fn T normalize(T x) +/// \brief Returns the normalized unit vector of the specified floating-point +/// vector. \param x [in] The vector of floats. +/// +/// Normalize is based on the following formula: x / length(x). + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) +half normalize(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) +half2 normalize(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) +half3 normalize(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) +half4 normalize(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) +float normalize(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) +float2 normalize(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) +float3 normalize(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) +float4 normalize(float4); + +//===----------------------------------------------------------------------===// +// or builtins +//===----------------------------------------------------------------------===// + +/// \fn bool or(bool x, bool y) +/// \brief Logically ors two boolean vectors elementwise and produces a bool +/// vector output. + +// TODO: Clean up clang-format marker once we've resolved +// https://github.com/llvm/llvm-project/issues/127851 +// +// clang-format off +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_or) +bool or(bool, bool); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_or) +bool2 or(bool2, bool2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_or) +bool3 or(bool3, bool3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_or) +bool4 or(bool4, bool4); +// clang-format on + +//===----------------------------------------------------------------------===// +// pow builtins +//===----------------------------------------------------------------------===// + +/// \fn T pow(T Val, T Pow) +/// \brief Return the value \a Val, raised to the power \a Pow. +/// \param Val The input value. +/// \param Pow The specified power. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) +half pow(half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) +half2 pow(half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) +half3 pow(half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) +half4 pow(half4, half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) +float pow(float, float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) +float2 pow(float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) +float3 pow(float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) +float4 pow(float4, float4); + +//===----------------------------------------------------------------------===// +// reversebits builtins +//===----------------------------------------------------------------------===// + +/// \fn T reversebits(T Val) +/// \brief Return the value \a Val with the bit order reversed. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint16_t reversebits(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint16_t2 reversebits(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint16_t3 reversebits(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint16_t4 reversebits(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint reversebits(uint); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint2 reversebits(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint3 reversebits(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint4 reversebits(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint64_t reversebits(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint64_t2 reversebits(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint64_t3 reversebits(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) +uint64_t4 reversebits(uint64_t4); + +//===----------------------------------------------------------------------===// +// cross builtins +//===----------------------------------------------------------------------===// + +/// \fn T cross(T x, T y) +/// \brief Returns the cross product of two floating-point, 3D vectors. +/// \param x [in] The first floating-point, 3D vector. +/// \param y [in] The second floating-point, 3D vector. +/// +/// Result is the cross product of x and y, i.e., the resulting +/// components are, in order : +/// x[1] * y[2] - y[1] * x[2] +/// x[2] * y[0] - y[2] * x[0] +/// x[0] * y[1] - y[0] * x[1] + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_cross) +half3 cross(half3, half3); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_cross) +float3 cross(float3, float3); + +//===----------------------------------------------------------------------===// +// rcp builtins +//===----------------------------------------------------------------------===// + +/// \fn T rcp(T x) +/// \brief Calculates a fast, approximate, per-component reciprocal ie 1 / \a x. +/// \param x The specified input value. +/// +/// The return value is the reciprocal of the \a x parameter. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +half rcp(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +half2 rcp(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +half3 rcp(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +half4 rcp(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +float rcp(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +float2 rcp(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +float3 rcp(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +float4 rcp(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +double rcp(double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +double2 rcp(double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +double3 rcp(double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) +double4 rcp(double4); + +//===----------------------------------------------------------------------===// +// rsqrt builtins +//===----------------------------------------------------------------------===// + +/// \fn T rsqrt(T x) +/// \brief Returns the reciprocal of the square root of the specified value. +/// ie 1 / sqrt( \a x). +/// \param x The specified input value. +/// +/// This function uses the following formula: 1 / sqrt(x). + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) +half rsqrt(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) +half2 rsqrt(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) +half3 rsqrt(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) +half4 rsqrt(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) +float rsqrt(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) +float2 rsqrt(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) +float3 rsqrt(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) +float4 rsqrt(float4); + +//===----------------------------------------------------------------------===// +// round builtins +//===----------------------------------------------------------------------===// + +/// \fn T round(T x) +/// \brief Rounds the specified value \a x to the nearest integer. +/// \param x The specified input value. +/// +/// The return value is the \a x parameter, rounded to the nearest integer +/// within a floating-point type. Halfway cases are +/// rounded to the nearest even value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) +half round(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) +half2 round(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) +half3 round(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) +half4 round(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) +float round(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) +float2 round(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) +float3 round(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) +float4 round(float4); + +//===----------------------------------------------------------------------===// +// saturate builtins +//===----------------------------------------------------------------------===// + +/// \fn T saturate(T Val) +/// \brief Returns input value, \a Val, clamped within the range of 0.0f +/// to 1.0f. \param Val The input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +half saturate(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +half2 saturate(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +half3 saturate(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +half4 saturate(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +float saturate(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +float2 saturate(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +float3 saturate(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +float4 saturate(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +double saturate(double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +double2 saturate(double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +double3 saturate(double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +double4 saturate(double4); + +//===----------------------------------------------------------------------===// +// select builtins +//===----------------------------------------------------------------------===// + +/// \fn T select(bool Cond, T TrueVal, T FalseVal) +/// \brief ternary operator. +/// \param Cond The Condition input value. +/// \param TrueVal The Value returned if Cond is true. +/// \param FalseVal The Value returned if Cond is false. + +template +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_select) +T select(bool, T, T); + +/// \fn vector select(vector Conds, vector TrueVals, +/// vector FalseVals) +/// \brief ternary operator for vectors. All vectors must be the same size. +/// \param Conds The Condition input values. +/// \param TrueVals The vector values are chosen from when conditions are true. +/// \param FalseVals The vector values are chosen from when conditions are +/// false. + +template +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_select) +vector select(vector, vector, vector); + +//===----------------------------------------------------------------------===// +// sin builtins +//===----------------------------------------------------------------------===// + +/// \fn T sin(T Val) +/// \brief Returns the sine of the input value, \a Val. +/// \param Val The input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) +half sin(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) +half2 sin(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) +half3 sin(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) +half4 sin(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) +float sin(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) +float2 sin(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) +float3 sin(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) +float4 sin(float4); + +//===----------------------------------------------------------------------===// +// sinh builtins +//===----------------------------------------------------------------------===// + +/// \fn T sinh(T Val) +/// \brief Returns the hyperbolic sine of the input value, \a Val. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) +half sinh(half); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) +half2 sinh(half2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) +half3 sinh(half3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) +half4 sinh(half4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) +float sinh(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) +float2 sinh(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) +float3 sinh(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) +float4 sinh(float4); + +//===----------------------------------------------------------------------===// +// sqrt builtins +//===----------------------------------------------------------------------===// + +/// \fn T sqrt(T Val) +/// \brief Returns the square root of the input value, \a Val. +/// \param Val The input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) +half sqrt(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) +half2 sqrt(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) +half3 sqrt(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) +half4 sqrt(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) +float sqrt(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) +float2 sqrt(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) +float3 sqrt(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) +float4 sqrt(float4); + +//===----------------------------------------------------------------------===// +// step builtins +//===----------------------------------------------------------------------===// + +/// \fn T step(T x, T y) +/// \brief Returns 1 if the x parameter is greater than or equal to the y +/// parameter; otherwise, 0. vector. \param x [in] The first floating-point +/// value to compare. \param y [in] The first floating-point value to compare. +/// +/// Step is based on the following formula: (x >= y) ? 1 : 0 + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +half step(half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +half2 step(half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +half3 step(half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +half4 step(half4, half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +float step(float, float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +float2 step(float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +float3 step(float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +float4 step(float4, float4); + +//===----------------------------------------------------------------------===// +// tan builtins +//===----------------------------------------------------------------------===// + +/// \fn T tan(T Val) +/// \brief Returns the tangent of the input value, \a Val. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) +half tan(half); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) +half2 tan(half2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) +half3 tan(half3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) +half4 tan(half4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) +float tan(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) +float2 tan(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) +float3 tan(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) +float4 tan(float4); + +//===----------------------------------------------------------------------===// +// tanh builtins +//===----------------------------------------------------------------------===// + +/// \fn T tanh(T Val) +/// \brief Returns the hyperbolic tangent of the input value, \a Val. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) +half tanh(half); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) +half2 tanh(half2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) +half3 tanh(half3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) +half4 tanh(half4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) +float tanh(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) +float2 tanh(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) +float3 tanh(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) +float4 tanh(float4); + +//===----------------------------------------------------------------------===// +// trunc builtins +//===----------------------------------------------------------------------===// + +/// \fn T trunc(T Val) +/// \brief Returns the truncated integer value of the input value, \a Val. +/// \param Val The input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) +half trunc(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) +half2 trunc(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) +half3 trunc(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) +half4 trunc(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) +float trunc(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) +float2 trunc(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) +float3 trunc(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) +float4 trunc(float4); + +//===----------------------------------------------------------------------===// +// Wave* builtins +//===----------------------------------------------------------------------===// + +/// \brief Returns true if the expression is true in all active lanes in the +/// current wave. +/// +/// \param Val The boolean expression to evaluate. +/// \return True if the expression is true in all lanes. +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_all_true) +__attribute__((convergent)) bool WaveActiveAllTrue(bool Val); + +/// \brief Returns true if the expression is true in any active lane in the +/// current wave. +/// +/// \param Val The boolean expression to evaluate. +/// \return True if the expression is true in any lane. +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_any_true) +__attribute__((convergent)) bool WaveActiveAnyTrue(bool Val); + +/// \brief Counts the number of boolean variables which evaluate to true across +/// all active lanes in the current wave. +/// +/// \param Val The input boolean value. +/// \return The number of lanes for which the boolean variable evaluates to +/// true, across all active lanes in the current wave. +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_count_bits) +__attribute__((convergent)) uint WaveActiveCountBits(bool Val); + +/// \brief Returns the index of the current lane within the current wave. +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_get_lane_index) +__attribute__((convergent)) uint WaveGetLaneIndex(); + +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_is_first_lane) +__attribute__((convergent)) bool WaveIsFirstLane(); + +//===----------------------------------------------------------------------===// +// WaveReadLaneAt builtins +//===----------------------------------------------------------------------===// + +// \brief Returns the value of the expression for the given lane index within +// the specified wave. + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) bool WaveReadLaneAt(bool, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) bool2 WaveReadLaneAt(bool2, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) bool3 WaveReadLaneAt(bool3, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) bool4 WaveReadLaneAt(bool4, int32_t); + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int16_t WaveReadLaneAt(int16_t, int32_t); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int16_t2 WaveReadLaneAt(int16_t2, int32_t); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int16_t3 WaveReadLaneAt(int16_t3, int32_t); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int16_t4 WaveReadLaneAt(int16_t4, int32_t); +#endif + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) half WaveReadLaneAt(half, int32_t); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) half2 WaveReadLaneAt(half2, int32_t); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) half3 WaveReadLaneAt(half3, int32_t); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) half4 WaveReadLaneAt(half4, int32_t); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int WaveReadLaneAt(int, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int2 WaveReadLaneAt(int2, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int3 WaveReadLaneAt(int3, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int4 WaveReadLaneAt(int4, int32_t); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) float WaveReadLaneAt(float, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) float2 WaveReadLaneAt(float2, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) float3 WaveReadLaneAt(float3, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) float4 WaveReadLaneAt(float4, int32_t); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int64_t WaveReadLaneAt(int64_t, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int64_t2 WaveReadLaneAt(int64_t2, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int64_t3 WaveReadLaneAt(int64_t3, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) int64_t4 WaveReadLaneAt(int64_t4, int32_t); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) double WaveReadLaneAt(double, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) double2 WaveReadLaneAt(double2, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) double3 WaveReadLaneAt(double3, int32_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) +__attribute__((convergent)) double4 WaveReadLaneAt(double4, int32_t); + +//===----------------------------------------------------------------------===// +// WaveActiveMax builtins +//===----------------------------------------------------------------------===// + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) half WaveActiveMax(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) half2 WaveActiveMax(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) half3 WaveActiveMax(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) half4 WaveActiveMax(half4); + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int16_t WaveActiveMax(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int16_t2 WaveActiveMax(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int16_t3 WaveActiveMax(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int16_t4 WaveActiveMax(int16_t4); + +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint16_t WaveActiveMax(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint16_t2 WaveActiveMax(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint16_t3 WaveActiveMax(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint16_t4 WaveActiveMax(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int WaveActiveMax(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int2 WaveActiveMax(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int3 WaveActiveMax(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int4 WaveActiveMax(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint WaveActiveMax(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint2 WaveActiveMax(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint3 WaveActiveMax(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint4 WaveActiveMax(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int64_t WaveActiveMax(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int64_t2 WaveActiveMax(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int64_t3 WaveActiveMax(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) int64_t4 WaveActiveMax(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint64_t WaveActiveMax(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint64_t2 WaveActiveMax(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint64_t3 WaveActiveMax(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) uint64_t4 WaveActiveMax(uint64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) float WaveActiveMax(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) float2 WaveActiveMax(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) float3 WaveActiveMax(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) float4 WaveActiveMax(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) double WaveActiveMax(double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) double2 WaveActiveMax(double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) double3 WaveActiveMax(double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) +__attribute__((convergent)) double4 WaveActiveMax(double4); + +//===----------------------------------------------------------------------===// +// WaveActiveSum builtins +//===----------------------------------------------------------------------===// + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) half WaveActiveSum(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) half2 WaveActiveSum(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) half3 WaveActiveSum(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) half4 WaveActiveSum(half4); + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int16_t WaveActiveSum(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int16_t2 WaveActiveSum(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int16_t3 WaveActiveSum(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int16_t4 WaveActiveSum(int16_t4); + +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint16_t WaveActiveSum(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint16_t2 WaveActiveSum(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint16_t3 WaveActiveSum(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint16_t4 WaveActiveSum(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int WaveActiveSum(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int2 WaveActiveSum(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int3 WaveActiveSum(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int4 WaveActiveSum(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint WaveActiveSum(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint2 WaveActiveSum(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint3 WaveActiveSum(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint4 WaveActiveSum(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int64_t WaveActiveSum(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int64_t2 WaveActiveSum(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int64_t3 WaveActiveSum(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) int64_t4 WaveActiveSum(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint64_t WaveActiveSum(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint64_t2 WaveActiveSum(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint64_t3 WaveActiveSum(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) uint64_t4 WaveActiveSum(uint64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) float WaveActiveSum(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) float2 WaveActiveSum(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) float3 WaveActiveSum(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) float4 WaveActiveSum(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) double WaveActiveSum(double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) double2 WaveActiveSum(double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) double3 WaveActiveSum(double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) +__attribute__((convergent)) double4 WaveActiveSum(double4); + +//===----------------------------------------------------------------------===// +// sign builtins +//===----------------------------------------------------------------------===// + +/// \fn T sign(T Val) +/// \brief Returns -1 if \a Val is less than zero; 0 if \a Val equals zero; and +/// 1 if \a Val is greater than zero. \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int sign(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int2 sign(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int3 sign(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int4 sign(int16_t4); + +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int sign(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int2 sign(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int3 sign(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int4 sign(uint16_t4); +#endif + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int sign(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int2 sign(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int3 sign(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int4 sign(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int sign(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int2 sign(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int3 sign(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int4 sign(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int sign(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int2 sign(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int3 sign(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int4 sign(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int sign(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int2 sign(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int3 sign(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int4 sign(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int sign(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int2 sign(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int3 sign(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int4 sign(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int sign(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int2 sign(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int3 sign(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int4 sign(uint64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int sign(double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int2 sign(double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int3 sign(double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) +int4 sign(double4); + +//===----------------------------------------------------------------------===// +// radians builtins +//===----------------------------------------------------------------------===// + +/// \fn T radians(T Val) +/// \brief Converts the specified value from degrees to radians. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) +half radians(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) +half2 radians(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) +half3 radians(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) +half4 radians(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) +float radians(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) +float2 radians(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) +float3 radians(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) +float4 radians(float4); + +//===----------------------------------------------------------------------===// +// GroupMemoryBarrierWithGroupSync builtins +//===----------------------------------------------------------------------===// + +/// \fn void GroupMemoryBarrierWithGroupSync(void) +/// \brief Blocks execution of all threads in a group until all group shared +/// accesses have been completed and all threads in the group have reached this +/// call. + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_group_memory_barrier_with_group_sync) +void GroupMemoryBarrierWithGroupSync(void); + +} // namespace hlsl +#endif //_HLSL_HLSL_ALIAS_INTRINSICS_H_ diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index 0d568539cd66a..39254a3cc3a0a 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -9,6 +9,8 @@ #ifndef _HLSL_HLSL_DETAILS_H_ #define _HLSL_HLSL_DETAILS_H_ +#include "hlsl_alias_intrinsics.h" + namespace hlsl { namespace __detail { @@ -55,7 +57,7 @@ constexpr vector d3d_color_to_ubyte4_impl(vector V) { template constexpr enable_if_t::value || is_same::value, T> length_impl(T X) { - return __builtin_elementwise_abs(X); + return abs(X); } template @@ -64,7 +66,7 @@ length_vec_impl(vector X) { #if (__has_builtin(__builtin_spirv_length)) return __builtin_spirv_length(X); #else - return __builtin_elementwise_sqrt(__builtin_hlsl_dot(X, X)); + return sqrt(dot(X, X)); #endif } @@ -91,7 +93,7 @@ constexpr vector reflect_vec_impl(vector I, vector N) { #if (__has_builtin(__builtin_spirv_reflect)) return __builtin_spirv_reflect(I, N); #else - return I - 2 * N * __builtin_hlsl_dot(I, N); + return I - 2 * N * dot(I, N); #endif } diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 239d7a3f59b77..fe9441080433d 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -13,413 +13,6 @@ namespace hlsl { -// Note: Functions in this file are sorted alphabetically, then grouped by base -// element type, and the element types are sorted by size, then singed integer, -// unsigned integer and floating point. Keeping this ordering consistent will -// help keep this file manageable as it grows. - -#define _HLSL_BUILTIN_ALIAS(builtin) \ - __attribute__((clang_builtin_alias(builtin))) -#define _HLSL_AVAILABILITY(platform, version) \ - __attribute__((availability(platform, introduced = version))) -#define _HLSL_AVAILABILITY_STAGE(platform, version, stage) \ - __attribute__(( \ - availability(platform, introduced = version, environment = stage))) - -#ifdef __HLSL_ENABLE_16_BIT -#define _HLSL_16BIT_AVAILABILITY(platform, version) \ - __attribute__((availability(platform, introduced = version))) -#define _HLSL_16BIT_AVAILABILITY_STAGE(platform, version, stage) \ - __attribute__(( \ - availability(platform, introduced = version, environment = stage))) -#else -#define _HLSL_16BIT_AVAILABILITY(environment, version) -#define _HLSL_16BIT_AVAILABILITY_STAGE(environment, version, stage) -#endif - -//===----------------------------------------------------------------------===// -// abs builtins -//===----------------------------------------------------------------------===// - -/// \fn T abs(T Val) -/// \brief Returns the absolute value of the input value, \a Val. -/// \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int16_t abs(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int16_t2 abs(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int16_t3 abs(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int16_t4 abs(int16_t4); - -_HLSL_AVAILABILITY(shadermodel, 6.2) -constexpr uint16_t abs(uint16_t V) { return V; } -_HLSL_AVAILABILITY(shadermodel, 6.2) -constexpr uint16_t2 abs(uint16_t2 V) { return V; } -_HLSL_AVAILABILITY(shadermodel, 6.2) -constexpr uint16_t3 abs(uint16_t3 V) { return V; } -_HLSL_AVAILABILITY(shadermodel, 6.2) -constexpr uint16_t4 abs(uint16_t4 V) { return V; } -#endif - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -half abs(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -half2 abs(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -half3 abs(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -half4 abs(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int abs(int); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int2 abs(int2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int3 abs(int3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int4 abs(int4); - -constexpr uint abs(uint V) { return V; } -constexpr uint2 abs(uint2 V) { return V; } -constexpr uint3 abs(uint3 V) { return V; } -constexpr uint4 abs(uint4 V) { return V; } - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -float abs(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -float2 abs(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -float3 abs(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -float4 abs(float4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int64_t abs(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int64_t2 abs(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int64_t3 abs(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -int64_t4 abs(int64_t4); - -constexpr uint64_t abs(uint64_t V) { return V; } -constexpr uint64_t2 abs(uint64_t2 V) { return V; } -constexpr uint64_t3 abs(uint64_t3 V) { return V; } -constexpr uint64_t4 abs(uint64_t4 V) { return V; } - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -double abs(double); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -double2 abs(double2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -double3 abs(double3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) -double4 abs(double4); - -//===----------------------------------------------------------------------===// -// acos builtins -//===----------------------------------------------------------------------===// - -/// \fn T acos(T Val) -/// \brief Returns the arccosine of the input value, \a Val. -/// \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) -half acos(half); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) -half2 acos(half2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) -half3 acos(half3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) -half4 acos(half4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) -float acos(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) -float2 acos(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) -float3 acos(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) -float4 acos(float4); - -//===----------------------------------------------------------------------===// -// all builtins -//===----------------------------------------------------------------------===// - -/// \fn bool all(T x) -/// \brief Returns True if all components of the \a x parameter are non-zero; -/// otherwise, false. \param x The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int16_t4); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint16_t4); -#endif - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(bool); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(bool2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(bool3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(bool4); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(uint64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(double); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(double2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(double3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) -bool all(double4); - -//===----------------------------------------------------------------------===// -// and builtins -//===----------------------------------------------------------------------===// - -/// \fn bool and(bool x, bool y) -/// \brief Logically ands two boolean vectors elementwise and produces a bool -/// vector output. - -// TODO: Clean up clang-format marker once we've resolved -// https://github.com/llvm/llvm-project/issues/127851 -// -// clang-format off -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) -bool and(bool x, bool y); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) -bool2 and(bool2 x, bool2 y); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) -bool3 and(bool3 x, bool3 y); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) -bool4 and(bool4 x, bool4 y); -// clang-format on - -//===----------------------------------------------------------------------===// -// any builtins -//===----------------------------------------------------------------------===// - -/// \fn bool any(T x) -/// \brief Returns True if any components of the \a x parameter are non-zero; -/// otherwise, false. \param x The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int16_t4); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint16_t4); -#endif - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(bool); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(bool2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(bool3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(bool4); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(uint64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(double); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(double2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(double3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) -bool any(double4); - -//===----------------------------------------------------------------------===// -// asdouble builtins -//===----------------------------------------------------------------------===// - -/// \fn double asdouble(uint LowBits, uint HighBits) -/// \brief Reinterprets a cast value (two 32-bit values) into a double. -/// \param LowBits The low 32-bit pattern of the input value. -/// \param HighBits The high 32-bit pattern of the input value. - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) -double asdouble(uint, uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) -double2 asdouble(uint2, uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) -double3 asdouble(uint3, uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) -double4 asdouble(uint4, uint4); - //===----------------------------------------------------------------------===// // asfloat builtins //===----------------------------------------------------------------------===// @@ -453,34 +46,6 @@ template constexpr int asint(T F) { return __detail::bit_cast(F); } -//===----------------------------------------------------------------------===// -// asin builtins -//===----------------------------------------------------------------------===// - -/// \fn T asin(T Val) -/// \brief Returns the arcsine of the input value, \a Val. -/// \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) -half asin(half); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) -half2 asin(half2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) -half3 asin(half3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) -half4 asin(half4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) -float asin(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) -float2 asin(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) -float3 asin(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_asin) -float4 asin(float4); - //===----------------------------------------------------------------------===// // asuint builtins //===----------------------------------------------------------------------===// @@ -515,403 +80,6 @@ void asuint(double3, out uint3, out uint3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble) void asuint(double4, out uint4, out uint4); -//===----------------------------------------------------------------------===// -// atan builtins -//===----------------------------------------------------------------------===// - -/// \fn T atan(T Val) -/// \brief Returns the arctangent of the input value, \a Val. -/// \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) -half atan(half); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) -half2 atan(half2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) -half3 atan(half3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) -half4 atan(half4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) -float atan(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) -float2 atan(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) -float3 atan(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan) -float4 atan(float4); - -//===----------------------------------------------------------------------===// -// atan2 builtins -//===----------------------------------------------------------------------===// - -/// \fn T atan2(T y, T x) -/// \brief Returns the arctangent of y/x, using the signs of the arguments to -/// determine the correct quadrant. -/// \param y The y-coordinate. -/// \param x The x-coordinate. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) -half atan2(half y, half x); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) -half2 atan2(half2 y, half2 x); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) -half3 atan2(half3 y, half3 x); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) -half4 atan2(half4 y, half4 x); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) -float atan2(float y, float x); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) -float2 atan2(float2 y, float2 x); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) -float3 atan2(float3 y, float3 x); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_atan2) -float4 atan2(float4 y, float4 x); - -//===----------------------------------------------------------------------===// -// ceil builtins -//===----------------------------------------------------------------------===// - -/// \fn T ceil(T Val) -/// \brief Returns the smallest integer value that is greater than or equal to -/// the input value, \a Val. -/// \param Val The input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) -half ceil(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) -half2 ceil(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) -half3 ceil(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) -half4 ceil(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) -float ceil(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) -float2 ceil(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) -float3 ceil(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) -float4 ceil(float4); - -//===----------------------------------------------------------------------===// -// clamp builtins -//===----------------------------------------------------------------------===// - -/// \fn T clamp(T X, T Min, T Max) -/// \brief Clamps the specified value \a X to the specified -/// minimum ( \a Min) and maximum ( \a Max) range. -/// \param X A value to clamp. -/// \param Min The specified minimum range. -/// \param Max The specified maximum range. -/// -/// Returns The clamped value for the \a X parameter. -/// For values of -INF or INF, clamp will behave as expected. -/// However for values of NaN, the results are undefined. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -half clamp(half, half, half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -half2 clamp(half2, half2, half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -half3 clamp(half3, half3, half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -half4 clamp(half4, half4, half4); - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int16_t clamp(int16_t, int16_t, int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int16_t2 clamp(int16_t2, int16_t2, int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int16_t3 clamp(int16_t3, int16_t3, int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int16_t4 clamp(int16_t4, int16_t4, int16_t4); - -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint16_t clamp(uint16_t, uint16_t, uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint16_t2 clamp(uint16_t2, uint16_t2, uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint16_t3 clamp(uint16_t3, uint16_t3, uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint16_t4 clamp(uint16_t4, uint16_t4, uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int clamp(int, int, int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int2 clamp(int2, int2, int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int3 clamp(int3, int3, int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int4 clamp(int4, int4, int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint clamp(uint, uint, uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint2 clamp(uint2, uint2, uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint3 clamp(uint3, uint3, uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint4 clamp(uint4, uint4, uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int64_t clamp(int64_t, int64_t, int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int64_t2 clamp(int64_t2, int64_t2, int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int64_t3 clamp(int64_t3, int64_t3, int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -int64_t4 clamp(int64_t4, int64_t4, int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint64_t clamp(uint64_t, uint64_t, uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint64_t2 clamp(uint64_t2, uint64_t2, uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint64_t3 clamp(uint64_t3, uint64_t3, uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -uint64_t4 clamp(uint64_t4, uint64_t4, uint64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -float clamp(float, float, float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -float2 clamp(float2, float2, float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -float3 clamp(float3, float3, float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -float4 clamp(float4, float4, float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -double clamp(double, double, double); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -double2 clamp(double2, double2, double2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -double3 clamp(double3, double3, double3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) -double4 clamp(double4, double4, double4); - -//===----------------------------------------------------------------------===// -// clip builtins -//===----------------------------------------------------------------------===// - -/// \fn void clip(T Val) -/// \brief Discards the current pixel if the specified value is less than zero. -/// \param Val The input value. - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) -void clip(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) -void clip(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) -void clip(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) -void clip(float4); - -//===----------------------------------------------------------------------===// -// cos builtins -//===----------------------------------------------------------------------===// - -/// \fn T cos(T Val) -/// \brief Returns the cosine of the input value, \a Val. -/// \param Val The input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) -half cos(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) -half2 cos(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) -half3 cos(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) -half4 cos(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) -float cos(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) -float2 cos(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) -float3 cos(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) -float4 cos(float4); - -//===----------------------------------------------------------------------===// -// cosh builtins -//===----------------------------------------------------------------------===// - -/// \fn T cosh(T Val) -/// \brief Returns the hyperbolic cosine of the input value, \a Val. -/// \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) -half cosh(half); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) -half2 cosh(half2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) -half3 cosh(half3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) -half4 cosh(half4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) -float cosh(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) -float2 cosh(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) -float3 cosh(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) -float4 cosh(float4); - -//===----------------------------------------------------------------------===// -// count bits builtins -//===----------------------------------------------------------------------===// - -/// \fn T countbits(T Val) -/// \brief Return the number of bits (per component) set in the input integer. -/// \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -const inline uint countbits(int16_t x) { - return __builtin_elementwise_popcount(x); -} -_HLSL_AVAILABILITY(shadermodel, 6.2) -const inline uint2 countbits(int16_t2 x) { - return __builtin_elementwise_popcount(x); -} -_HLSL_AVAILABILITY(shadermodel, 6.2) -const inline uint3 countbits(int16_t3 x) { - return __builtin_elementwise_popcount(x); -} -_HLSL_AVAILABILITY(shadermodel, 6.2) -const inline uint4 countbits(int16_t4 x) { - return __builtin_elementwise_popcount(x); -} -_HLSL_AVAILABILITY(shadermodel, 6.2) -const inline uint countbits(uint16_t x) { - return __builtin_elementwise_popcount(x); -} -_HLSL_AVAILABILITY(shadermodel, 6.2) -const inline uint2 countbits(uint16_t2 x) { - return __builtin_elementwise_popcount(x); -} -_HLSL_AVAILABILITY(shadermodel, 6.2) -const inline uint3 countbits(uint16_t3 x) { - return __builtin_elementwise_popcount(x); -} -_HLSL_AVAILABILITY(shadermodel, 6.2) -const inline uint4 countbits(uint16_t4 x) { - return __builtin_elementwise_popcount(x); -} -#endif - -const inline uint countbits(int x) { return __builtin_elementwise_popcount(x); } -const inline uint2 countbits(int2 x) { - return __builtin_elementwise_popcount(x); -} -const inline uint3 countbits(int3 x) { - return __builtin_elementwise_popcount(x); -} -const inline uint4 countbits(int4 x) { - return __builtin_elementwise_popcount(x); -} - -const inline uint countbits(uint x) { - return __builtin_elementwise_popcount(x); -} -const inline uint2 countbits(uint2 x) { - return __builtin_elementwise_popcount(x); -} -const inline uint3 countbits(uint3 x) { - return __builtin_elementwise_popcount(x); -} -const inline uint4 countbits(uint4 x) { - return __builtin_elementwise_popcount(x); -} - -const inline uint countbits(int64_t x) { - return __builtin_elementwise_popcount(x); -} -const inline uint2 countbits(int64_t2 x) { - return __builtin_elementwise_popcount(x); -} -const inline uint3 countbits(int64_t3 x) { - return __builtin_elementwise_popcount(x); -} -const inline uint4 countbits(int64_t4 x) { - return __builtin_elementwise_popcount(x); -} - -const inline uint countbits(uint64_t x) { - return __builtin_elementwise_popcount(x); -} -const inline uint2 countbits(uint64_t2 x) { - return __builtin_elementwise_popcount(x); -} -const inline uint3 countbits(uint64_t3 x) { - return __builtin_elementwise_popcount(x); -} -const inline uint4 countbits(uint64_t4 x) { - return __builtin_elementwise_popcount(x); -} - -//===----------------------------------------------------------------------===// -// degrees builtins -//===----------------------------------------------------------------------===// - -/// \fn T degrees(T x) -/// \brief Converts the specified value from radians to degrees. -/// \param x The specified input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) -half degrees(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) -half2 degrees(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) -half3 degrees(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) -half4 degrees(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) -float degrees(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) -float2 degrees(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) -float3 degrees(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees) -float4 degrees(float4); - //===----------------------------------------------------------------------===// // distance builtins //===----------------------------------------------------------------------===// @@ -942,1140 +110,73 @@ const inline float distance(vector X, vector Y) { } //===----------------------------------------------------------------------===// -// dot product builtins +// length builtins //===----------------------------------------------------------------------===// -/// \fn K dot(T X, T Y) -/// \brief Return the dot product (a scalar value) of \a X and \a Y. -/// \param X The X input value. -/// \param Y The Y input value. +/// \fn T length(T x) +/// \brief Returns the length of the specified floating-point vector. +/// \param x [in] The vector of floats, or a scalar float. +/// +/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...). _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -half dot(half, half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -half dot(half2, half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -half dot(half3, half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -half dot(half4, half4); - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int16_t dot(int16_t, int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int16_t dot(int16_t2, int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int16_t dot(int16_t3, int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int16_t dot(int16_t4, int16_t4); - -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint16_t dot(uint16_t, uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint16_t dot(uint16_t2, uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint16_t dot(uint16_t3, uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint16_t dot(uint16_t4, uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -float dot(float, float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -float dot(float2, float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -float dot(float3, float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -float dot(float4, float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -double dot(double, double); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int dot(int, int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int dot(int2, int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int dot(int3, int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int dot(int4, int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint dot(uint, uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint dot(uint2, uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint dot(uint3, uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint dot(uint4, uint4); +const inline half length(half X) { return __detail::length_impl(X); } +const inline float length(float X) { return __detail::length_impl(X); } -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int64_t dot(int64_t, int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int64_t dot(int64_t2, int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int64_t dot(int64_t3, int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int64_t dot(int64_t4, int64_t4); +template +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +const inline half length(vector X) { + return __detail::length_vec_impl(X); +} -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint64_t dot(uint64_t, uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint64_t dot(uint64_t2, uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint64_t dot(uint64_t3, uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint64_t dot(uint64_t4, uint64_t4); +template const inline float length(vector X) { + return __detail::length_vec_impl(X); +} //===----------------------------------------------------------------------===// -// dot4add builtins +// D3DCOLORtoUBYTE4 builtin //===----------------------------------------------------------------------===// -/// \fn int dot4add_i8packed(uint A, uint B, int C) - -_HLSL_AVAILABILITY(shadermodel, 6.4) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_i8packed) -int dot4add_i8packed(uint, uint, int); - -/// \fn uint dot4add_u8packed(uint A, uint B, uint C) +/// \fn T D3DCOLORtoUBYTE4(T x) +/// \brief Converts a floating-point, 4D vector set by a D3DCOLOR to a UBYTE4. +/// \param x [in] The floating-point vector4 to convert. +/// +/// The return value is the UBYTE4 representation of the \a x parameter. +/// +/// This function swizzles and scales components of the \a x parameter. Use this +/// function to compensate for the lack of UBYTE4 support in some hardware. -_HLSL_AVAILABILITY(shadermodel, 6.4) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_u8packed) -uint dot4add_u8packed(uint, uint, uint); +constexpr vector D3DCOLORtoUBYTE4(vector V) { + return __detail::d3d_color_to_ubyte4_impl(V); +} //===----------------------------------------------------------------------===// -// exp builtins +// reflect builtin //===----------------------------------------------------------------------===// -/// \fn T exp(T x) -/// \brief Returns the base-e exponential, or \a e**x, of the specified value. -/// \param x The specified input value. +/// \fn T reflect(T I, T N) +/// \brief Returns a reflection using an incident ray, \a I, and a surface +/// normal, \a N. +/// \param I The incident ray. +/// \param N The surface normal. +/// +/// The return value is a floating-point vector that represents the reflection +/// of the incident ray, \a I, off a surface with the normal \a N. +/// +/// This function calculates the reflection vector using the following formula: +/// V = I - 2 * N * dot(I N) . +/// +/// N must already be normalized in order to achieve the desired result. /// -/// The return value is the base-e exponential of the \a x parameter. +/// The operands must all be a scalar or vector whose component type is +/// floating-point. +/// +/// Result type and the type of all operands must be the same type. _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -half exp(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -half2 exp(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -half3 exp(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -half4 exp(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -float exp(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -float2 exp(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -float3 exp(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -float4 exp(float4); - -//===----------------------------------------------------------------------===// -// exp2 builtins -//===----------------------------------------------------------------------===// - -/// \fn T exp2(T x) -/// \brief Returns the base 2 exponential, or \a 2**x, of the specified value. -/// \param x The specified input value. -/// -/// The base 2 exponential of the \a x parameter. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -half exp2(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -half2 exp2(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -half3 exp2(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -half4 exp2(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -float exp2(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -float2 exp2(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -float3 exp2(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -float4 exp2(float4); - -//===----------------------------------------------------------------------===// -// firstbithigh builtins -//===----------------------------------------------------------------------===// - -/// \fn T firstbithigh(T Val) -/// \brief Returns the location of the first set bit starting from the highest -/// order bit and working downward, per component. -/// \param Val the input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int16_t4); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint64_t4); - -//===----------------------------------------------------------------------===// -// firstbitlow builtins -//===----------------------------------------------------------------------===// - -/// \fn T firstbitlow(T Val) -/// \brief Returns the location of the first set bit starting from the lowest -/// order bit and working upward, per component. -/// \param Val the input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(int16_t4); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(uint64_t4); - -//===----------------------------------------------------------------------===// -// floor builtins -//===----------------------------------------------------------------------===// - -/// \fn T floor(T Val) -/// \brief Returns the largest integer that is less than or equal to the input -/// value, \a Val. -/// \param Val The input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -half floor(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -half2 floor(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -half3 floor(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -half4 floor(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -float floor(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -float2 floor(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -float3 floor(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -float4 floor(float4); - -//===----------------------------------------------------------------------===// -// fmod builtins -//===----------------------------------------------------------------------===// - -/// \fn T fmod(T x, T y) -/// \brief Returns the linear interpolation of x to y. -/// \param x [in] The dividend. -/// \param y [in] The divisor. -/// -/// Return the floating-point remainder of the x parameter divided by the y -/// parameter. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) -half fmod(half, half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) -half2 fmod(half2, half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) -half3 fmod(half3, half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) -half4 fmod(half4, half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) -float fmod(float, float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) -float2 fmod(float2, float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) -float3 fmod(float3, float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod) -float4 fmod(float4, float4); - -//===----------------------------------------------------------------------===// -// frac builtins -//===----------------------------------------------------------------------===// - -/// \fn T frac(T x) -/// \brief Returns the fractional (or decimal) part of x. \a x parameter. -/// \param x The specified input value. -/// -/// If \a the return value is greater than or equal to 0 and less than 1. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) -half frac(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) -half2 frac(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) -half3 frac(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) -half4 frac(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) -float frac(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) -float2 frac(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) -float3 frac(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) -float4 frac(float4); - -//===----------------------------------------------------------------------===// -// isinf builtins -//===----------------------------------------------------------------------===// - -/// \fn T isinf(T x) -/// \brief Determines if the specified value \a x is infinite. -/// \param x The specified input value. -/// -/// Returns a value of the same size as the input, with a value set -/// to True if the x parameter is +INF or -INF. Otherwise, False. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) -bool isinf(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) -bool2 isinf(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) -bool3 isinf(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) -bool4 isinf(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) -bool isinf(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) -bool2 isinf(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) -bool3 isinf(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf) -bool4 isinf(float4); - -//===----------------------------------------------------------------------===// -// lerp builtins -//===----------------------------------------------------------------------===// - -/// \fn T lerp(T x, T y, T s) -/// \brief Returns the linear interpolation of x to y by s. -/// \param x [in] The first-floating point value. -/// \param y [in] The second-floating point value. -/// \param s [in] A value that linearly interpolates between the x parameter and -/// the y parameter. -/// -/// Linear interpolation is based on the following formula: x*(1-s) + y*s which -/// can equivalently be written as x + s(y-x). - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) -half lerp(half, half, half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) -half2 lerp(half2, half2, half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) -half3 lerp(half3, half3, half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) -half4 lerp(half4, half4, half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) -float lerp(float, float, float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) -float2 lerp(float2, float2, float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) -float3 lerp(float3, float3, float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) -float4 lerp(float4, float4, float4); - -//===----------------------------------------------------------------------===// -// length builtins -//===----------------------------------------------------------------------===// - -/// \fn T length(T x) -/// \brief Returns the length of the specified floating-point vector. -/// \param x [in] The vector of floats, or a scalar float. -/// -/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...). - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -const inline half length(half X) { return __detail::length_impl(X); } -const inline float length(float X) { return __detail::length_impl(X); } - -template -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -const inline half length(vector X) { - return __detail::length_vec_impl(X); -} - -template const inline float length(vector X) { - return __detail::length_vec_impl(X); -} - -//===----------------------------------------------------------------------===// -// log builtins -//===----------------------------------------------------------------------===// - -/// \fn T log(T Val) -/// \brief The base-e logarithm of the input value, \a Val parameter. -/// \param Val The input value. -/// -/// If \a Val is negative, this result is undefined. If \a Val is 0, this -/// function returns negative infinity. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) -half log(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) -half2 log(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) -half3 log(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) -half4 log(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) -float log(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) -float2 log(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) -float3 log(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) -float4 log(float4); - -//===----------------------------------------------------------------------===// -// log10 builtins -//===----------------------------------------------------------------------===// - -/// \fn T log10(T Val) -/// \brief The base-10 logarithm of the input value, \a Val parameter. -/// \param Val The input value. -/// -/// If \a Val is negative, this result is undefined. If \a Val is 0, this -/// function returns negative infinity. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) -half log10(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) -half2 log10(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) -half3 log10(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) -half4 log10(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) -float log10(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) -float2 log10(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) -float3 log10(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) -float4 log10(float4); - -//===----------------------------------------------------------------------===// -// log2 builtins -//===----------------------------------------------------------------------===// - -/// \fn T log2(T Val) -/// \brief The base-2 logarithm of the input value, \a Val parameter. -/// \param Val The input value. -/// -/// If \a Val is negative, this result is undefined. If \a Val is 0, this -/// function returns negative infinity. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) -half log2(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) -half2 log2(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) -half3 log2(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) -half4 log2(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) -float log2(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) -float2 log2(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) -float3 log2(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) -float4 log2(float4); - -//===----------------------------------------------------------------------===// -// mad builtins -//===----------------------------------------------------------------------===// - -/// \fn T mad(T M, T A, T B) -/// \brief The result of \a M * \a A + \a B. -/// \param M The multiplication value. -/// \param A The first addition value. -/// \param B The second addition value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -half mad(half, half, half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -half2 mad(half2, half2, half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -half3 mad(half3, half3, half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -half4 mad(half4, half4, half4); - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int16_t mad(int16_t, int16_t, int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int16_t2 mad(int16_t2, int16_t2, int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int16_t3 mad(int16_t3, int16_t3, int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int16_t4 mad(int16_t4, int16_t4, int16_t4); - -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint16_t mad(uint16_t, uint16_t, uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint16_t2 mad(uint16_t2, uint16_t2, uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint16_t3 mad(uint16_t3, uint16_t3, uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint16_t4 mad(uint16_t4, uint16_t4, uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int mad(int, int, int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int2 mad(int2, int2, int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int3 mad(int3, int3, int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int4 mad(int4, int4, int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint mad(uint, uint, uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint2 mad(uint2, uint2, uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint3 mad(uint3, uint3, uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint4 mad(uint4, uint4, uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int64_t mad(int64_t, int64_t, int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int64_t2 mad(int64_t2, int64_t2, int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int64_t3 mad(int64_t3, int64_t3, int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -int64_t4 mad(int64_t4, int64_t4, int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint64_t mad(uint64_t, uint64_t, uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint64_t2 mad(uint64_t2, uint64_t2, uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint64_t3 mad(uint64_t3, uint64_t3, uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -uint64_t4 mad(uint64_t4, uint64_t4, uint64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -float mad(float, float, float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -float2 mad(float2, float2, float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -float3 mad(float3, float3, float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -float4 mad(float4, float4, float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -double mad(double, double, double); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -double2 mad(double2, double2, double2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -double3 mad(double3, double3, double3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_mad) -double4 mad(double4, double4, double4); - -//===----------------------------------------------------------------------===// -// max builtins -//===----------------------------------------------------------------------===// - -/// \fn T max(T X, T Y) -/// \brief Return the greater of \a X and \a Y. -/// \param X The X input value. -/// \param Y The Y input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -half max(half, half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -half2 max(half2, half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -half3 max(half3, half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -half4 max(half4, half4); - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int16_t max(int16_t, int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int16_t2 max(int16_t2, int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int16_t3 max(int16_t3, int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int16_t4 max(int16_t4, int16_t4); - -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint16_t max(uint16_t, uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint16_t2 max(uint16_t2, uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint16_t3 max(uint16_t3, uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint16_t4 max(uint16_t4, uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int max(int, int); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int2 max(int2, int2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int3 max(int3, int3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int4 max(int4, int4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint max(uint, uint); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint2 max(uint2, uint2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint3 max(uint3, uint3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint4 max(uint4, uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int64_t max(int64_t, int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int64_t2 max(int64_t2, int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int64_t3 max(int64_t3, int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -int64_t4 max(int64_t4, int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint64_t max(uint64_t, uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint64_t2 max(uint64_t2, uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint64_t3 max(uint64_t3, uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -uint64_t4 max(uint64_t4, uint64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -float max(float, float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -float2 max(float2, float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -float3 max(float3, float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -float4 max(float4, float4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -double max(double, double); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -double2 max(double2, double2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -double3 max(double3, double3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) -double4 max(double4, double4); - -//===----------------------------------------------------------------------===// -// min builtins -//===----------------------------------------------------------------------===// - -/// \fn T min(T X, T Y) -/// \brief Return the lesser of \a X and \a Y. -/// \param X The X input value. -/// \param Y The Y input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -half min(half, half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -half2 min(half2, half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -half3 min(half3, half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -half4 min(half4, half4); - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int16_t min(int16_t, int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int16_t2 min(int16_t2, int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int16_t3 min(int16_t3, int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int16_t4 min(int16_t4, int16_t4); - -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint16_t min(uint16_t, uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint16_t2 min(uint16_t2, uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint16_t3 min(uint16_t3, uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint16_t4 min(uint16_t4, uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int min(int, int); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int2 min(int2, int2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int3 min(int3, int3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int4 min(int4, int4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint min(uint, uint); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint2 min(uint2, uint2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint3 min(uint3, uint3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint4 min(uint4, uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -float min(float, float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -float2 min(float2, float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -float3 min(float3, float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -float4 min(float4, float4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int64_t min(int64_t, int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int64_t2 min(int64_t2, int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int64_t3 min(int64_t3, int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -int64_t4 min(int64_t4, int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint64_t min(uint64_t, uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint64_t2 min(uint64_t2, uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint64_t3 min(uint64_t3, uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -uint64_t4 min(uint64_t4, uint64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -double min(double, double); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -double2 min(double2, double2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -double3 min(double3, double3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) -double4 min(double4, double4); - -//===----------------------------------------------------------------------===// -// normalize builtins -//===----------------------------------------------------------------------===// - -/// \fn T normalize(T x) -/// \brief Returns the normalized unit vector of the specified floating-point -/// vector. \param x [in] The vector of floats. -/// -/// Normalize is based on the following formula: x / length(x). - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) -half normalize(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) -half2 normalize(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) -half3 normalize(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) -half4 normalize(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) -float normalize(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) -float2 normalize(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) -float3 normalize(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_normalize) -float4 normalize(float4); - -//===----------------------------------------------------------------------===// -// pow builtins -//===----------------------------------------------------------------------===// - -/// \fn T pow(T Val, T Pow) -/// \brief Return the value \a Val, raised to the power \a Pow. -/// \param Val The input value. -/// \param Pow The specified power. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) -half pow(half, half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) -half2 pow(half2, half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) -half3 pow(half3, half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) -half4 pow(half4, half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) -float pow(float, float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) -float2 pow(float2, float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) -float3 pow(float3, float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) -float4 pow(float4, float4); - -//===----------------------------------------------------------------------===// -// reversebits builtins -//===----------------------------------------------------------------------===// - -/// \fn T reversebits(T Val) -/// \brief Return the value \a Val with the bit order reversed. -/// \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint16_t reversebits(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint16_t2 reversebits(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint16_t3 reversebits(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint16_t4 reversebits(uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint reversebits(uint); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint2 reversebits(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint3 reversebits(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint4 reversebits(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint64_t reversebits(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint64_t2 reversebits(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint64_t3 reversebits(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) -uint64_t4 reversebits(uint64_t4); - -//===----------------------------------------------------------------------===// -// cross builtins -//===----------------------------------------------------------------------===// - -/// \fn T cross(T x, T y) -/// \brief Returns the cross product of two floating-point, 3D vectors. -/// \param x [in] The first floating-point, 3D vector. -/// \param y [in] The second floating-point, 3D vector. -/// -/// Result is the cross product of x and y, i.e., the resulting -/// components are, in order : -/// x[1] * y[2] - y[1] * x[2] -/// x[2] * y[0] - y[2] * x[0] -/// x[0] * y[1] - y[0] * x[1] - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_cross) -half3 cross(half3, half3); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_cross) -float3 cross(float3, float3); - -//===----------------------------------------------------------------------===// -// D3DCOLORtoUBYTE4 builtin -//===----------------------------------------------------------------------===// - -/// \fn T D3DCOLORtoUBYTE4(T x) -/// \brief Converts a floating-point, 4D vector set by a D3DCOLOR to a UBYTE4. -/// \param x [in] The floating-point vector4 to convert. -/// -/// The return value is the UBYTE4 representation of the \a x parameter. -/// -/// This function swizzles and scales components of the \a x parameter. Use this -/// function to compensate for the lack of UBYTE4 support in some hardware. - -constexpr vector D3DCOLORtoUBYTE4(vector V) { - return __detail::d3d_color_to_ubyte4_impl(V); -} - -//===----------------------------------------------------------------------===// -// rcp builtins -//===----------------------------------------------------------------------===// - -/// \fn T rcp(T x) -/// \brief Calculates a fast, approximate, per-component reciprocal ie 1 / \a x. -/// \param x The specified input value. -/// -/// The return value is the reciprocal of the \a x parameter. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -half rcp(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -half2 rcp(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -half3 rcp(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -half4 rcp(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -float rcp(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -float2 rcp(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -float3 rcp(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -float4 rcp(float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -double rcp(double); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -double2 rcp(double2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -double3 rcp(double3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp) -double4 rcp(double4); - -//===----------------------------------------------------------------------===// -// reflect builtin -//===----------------------------------------------------------------------===// - -/// \fn T reflect(T I, T N) -/// \brief Returns a reflection using an incident ray, \a I, and a surface -/// normal, \a N. -/// \param I The incident ray. -/// \param N The surface normal. -/// -/// The return value is a floating-point vector that represents the reflection -/// of the incident ray, \a I, off a surface with the normal \a N. -/// -/// This function calculates the reflection vector using the following formula: -/// V = I - 2 * N * dot(I N) . -/// -/// N must already be normalized in order to achieve the desired result. -/// -/// The operands must all be a scalar or vector whose component type is -/// floating-point. -/// -/// Result type and the type of all operands must be the same type. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -const inline half reflect(half I, half N) { - return __detail::reflect_impl(I, N); -} +const inline half reflect(half I, half N) { + return __detail::reflect_impl(I, N); +} const inline float reflect(float I, float N) { return __detail::reflect_impl(I, N); @@ -2091,808 +192,5 @@ template const inline vector reflect(vector I, vector N) { return __detail::reflect_vec_impl(I, N); } - -//===----------------------------------------------------------------------===// -// rsqrt builtins -//===----------------------------------------------------------------------===// - -/// \fn T rsqrt(T x) -/// \brief Returns the reciprocal of the square root of the specified value. -/// ie 1 / sqrt( \a x). -/// \param x The specified input value. -/// -/// This function uses the following formula: 1 / sqrt(x). - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) -half rsqrt(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) -half2 rsqrt(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) -half3 rsqrt(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) -half4 rsqrt(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) -float rsqrt(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) -float2 rsqrt(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) -float3 rsqrt(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt) -float4 rsqrt(float4); - -//===----------------------------------------------------------------------===// -// round builtins -//===----------------------------------------------------------------------===// - -/// \fn T round(T x) -/// \brief Rounds the specified value \a x to the nearest integer. -/// \param x The specified input value. -/// -/// The return value is the \a x parameter, rounded to the nearest integer -/// within a floating-point type. Halfway cases are -/// rounded to the nearest even value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) -half round(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) -half2 round(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) -half3 round(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) -half4 round(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) -float round(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) -float2 round(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) -float3 round(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) -float4 round(float4); - -//===----------------------------------------------------------------------===// -// saturate builtins -//===----------------------------------------------------------------------===// - -/// \fn T saturate(T Val) -/// \brief Returns input value, \a Val, clamped within the range of 0.0f -/// to 1.0f. \param Val The input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -half saturate(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -half2 saturate(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -half3 saturate(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -half4 saturate(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -float saturate(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -float2 saturate(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -float3 saturate(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -float4 saturate(float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -double saturate(double); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -double2 saturate(double2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -double3 saturate(double3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) -double4 saturate(double4); - -//===----------------------------------------------------------------------===// -// select builtins -//===----------------------------------------------------------------------===// - -/// \fn T select(bool Cond, T TrueVal, T FalseVal) -/// \brief ternary operator. -/// \param Cond The Condition input value. -/// \param TrueVal The Value returned if Cond is true. -/// \param FalseVal The Value returned if Cond is false. - -template -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_select) -T select(bool, T, T); - -/// \fn vector select(vector Conds, vector TrueVals, -/// vector FalseVals) -/// \brief ternary operator for vectors. All vectors must be the same size. -/// \param Conds The Condition input values. -/// \param TrueVals The vector values are chosen from when conditions are true. -/// \param FalseVals The vector values are chosen from when conditions are -/// false. - -template -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_select) -vector select(vector, vector, vector); - -//===----------------------------------------------------------------------===// -// sin builtins -//===----------------------------------------------------------------------===// - -/// \fn T sin(T Val) -/// \brief Returns the sine of the input value, \a Val. -/// \param Val The input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) -half sin(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) -half2 sin(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) -half3 sin(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) -half4 sin(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) -float sin(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) -float2 sin(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) -float3 sin(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) -float4 sin(float4); - -//===----------------------------------------------------------------------===// -// sinh builtins -//===----------------------------------------------------------------------===// - -/// \fn T sinh(T Val) -/// \brief Returns the hyperbolic sine of the input value, \a Val. -/// \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) -half sinh(half); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) -half2 sinh(half2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) -half3 sinh(half3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) -half4 sinh(half4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) -float sinh(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) -float2 sinh(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) -float3 sinh(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sinh) -float4 sinh(float4); - -//===----------------------------------------------------------------------===// -// sqrt builtins -//===----------------------------------------------------------------------===// - -/// \fn T sqrt(T Val) -/// \brief Returns the square root of the input value, \a Val. -/// \param Val The input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) -half sqrt(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) -half2 sqrt(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) -half3 sqrt(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) -half4 sqrt(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) -float sqrt(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) -float2 sqrt(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) -float3 sqrt(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) -float4 sqrt(float4); - -//===----------------------------------------------------------------------===// -// step builtins -//===----------------------------------------------------------------------===// - -/// \fn T step(T x, T y) -/// \brief Returns 1 if the x parameter is greater than or equal to the y -/// parameter; otherwise, 0. vector. \param x [in] The first floating-point -/// value to compare. \param y [in] The first floating-point value to compare. -/// -/// Step is based on the following formula: (x >= y) ? 1 : 0 - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) -half step(half, half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) -half2 step(half2, half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) -half3 step(half3, half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) -half4 step(half4, half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) -float step(float, float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) -float2 step(float2, float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) -float3 step(float3, float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) -float4 step(float4, float4); - -//===----------------------------------------------------------------------===// -// tan builtins -//===----------------------------------------------------------------------===// - -/// \fn T tan(T Val) -/// \brief Returns the tangent of the input value, \a Val. -/// \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) -half tan(half); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) -half2 tan(half2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) -half3 tan(half3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) -half4 tan(half4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) -float tan(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) -float2 tan(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) -float3 tan(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan) -float4 tan(float4); - -//===----------------------------------------------------------------------===// -// tanh builtins -//===----------------------------------------------------------------------===// - -/// \fn T tanh(T Val) -/// \brief Returns the hyperbolic tangent of the input value, \a Val. -/// \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) -half tanh(half); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) -half2 tanh(half2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) -half3 tanh(half3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) -half4 tanh(half4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) -float tanh(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) -float2 tanh(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) -float3 tanh(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tanh) -float4 tanh(float4); - -//===----------------------------------------------------------------------===// -// trunc builtins -//===----------------------------------------------------------------------===// - -/// \fn T trunc(T Val) -/// \brief Returns the truncated integer value of the input value, \a Val. -/// \param Val The input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) -half trunc(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) -half2 trunc(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) -half3 trunc(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) -half4 trunc(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) -float trunc(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) -float2 trunc(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) -float3 trunc(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) -float4 trunc(float4); - -//===----------------------------------------------------------------------===// -// Wave* builtins -//===----------------------------------------------------------------------===// - -/// \brief Returns true if the expression is true in all active lanes in the -/// current wave. -/// -/// \param Val The boolean expression to evaluate. -/// \return True if the expression is true in all lanes. -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_all_true) -__attribute__((convergent)) bool WaveActiveAllTrue(bool Val); - -/// \brief Returns true if the expression is true in any active lane in the -/// current wave. -/// -/// \param Val The boolean expression to evaluate. -/// \return True if the expression is true in any lane. -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_any_true) -__attribute__((convergent)) bool WaveActiveAnyTrue(bool Val); - -/// \brief Counts the number of boolean variables which evaluate to true across -/// all active lanes in the current wave. -/// -/// \param Val The input boolean value. -/// \return The number of lanes for which the boolean variable evaluates to -/// true, across all active lanes in the current wave. -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_count_bits) -__attribute__((convergent)) uint WaveActiveCountBits(bool Val); - -/// \brief Returns the index of the current lane within the current wave. -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_get_lane_index) -__attribute__((convergent)) uint WaveGetLaneIndex(); - -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_is_first_lane) -__attribute__((convergent)) bool WaveIsFirstLane(); - -//===----------------------------------------------------------------------===// -// WaveReadLaneAt builtins -//===----------------------------------------------------------------------===// - -// \brief Returns the value of the expression for the given lane index within -// the specified wave. - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) bool WaveReadLaneAt(bool, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) bool2 WaveReadLaneAt(bool2, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) bool3 WaveReadLaneAt(bool3, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) bool4 WaveReadLaneAt(bool4, int32_t); - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int16_t WaveReadLaneAt(int16_t, int32_t); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int16_t2 WaveReadLaneAt(int16_t2, int32_t); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int16_t3 WaveReadLaneAt(int16_t3, int32_t); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int16_t4 WaveReadLaneAt(int16_t4, int32_t); -#endif - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) half WaveReadLaneAt(half, int32_t); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) half2 WaveReadLaneAt(half2, int32_t); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) half3 WaveReadLaneAt(half3, int32_t); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) half4 WaveReadLaneAt(half4, int32_t); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int WaveReadLaneAt(int, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int2 WaveReadLaneAt(int2, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int3 WaveReadLaneAt(int3, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int4 WaveReadLaneAt(int4, int32_t); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) float WaveReadLaneAt(float, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) float2 WaveReadLaneAt(float2, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) float3 WaveReadLaneAt(float3, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) float4 WaveReadLaneAt(float4, int32_t); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int64_t WaveReadLaneAt(int64_t, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int64_t2 WaveReadLaneAt(int64_t2, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int64_t3 WaveReadLaneAt(int64_t3, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) int64_t4 WaveReadLaneAt(int64_t4, int32_t); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) double WaveReadLaneAt(double, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) double2 WaveReadLaneAt(double2, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) double3 WaveReadLaneAt(double3, int32_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at) -__attribute__((convergent)) double4 WaveReadLaneAt(double4, int32_t); - -//===----------------------------------------------------------------------===// -// WaveActiveMax builtins -//===----------------------------------------------------------------------===// - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) half WaveActiveMax(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) half2 WaveActiveMax(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) half3 WaveActiveMax(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) half4 WaveActiveMax(half4); - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int16_t WaveActiveMax(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int16_t2 WaveActiveMax(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int16_t3 WaveActiveMax(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int16_t4 WaveActiveMax(int16_t4); - -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint16_t WaveActiveMax(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint16_t2 WaveActiveMax(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint16_t3 WaveActiveMax(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint16_t4 WaveActiveMax(uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int WaveActiveMax(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int2 WaveActiveMax(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int3 WaveActiveMax(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int4 WaveActiveMax(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint WaveActiveMax(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint2 WaveActiveMax(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint3 WaveActiveMax(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint4 WaveActiveMax(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int64_t WaveActiveMax(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int64_t2 WaveActiveMax(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int64_t3 WaveActiveMax(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) int64_t4 WaveActiveMax(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint64_t WaveActiveMax(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint64_t2 WaveActiveMax(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint64_t3 WaveActiveMax(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) uint64_t4 WaveActiveMax(uint64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) float WaveActiveMax(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) float2 WaveActiveMax(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) float3 WaveActiveMax(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) float4 WaveActiveMax(float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) double WaveActiveMax(double); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) double2 WaveActiveMax(double2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) double3 WaveActiveMax(double3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max) -__attribute__((convergent)) double4 WaveActiveMax(double4); - -//===----------------------------------------------------------------------===// -// WaveActiveSum builtins -//===----------------------------------------------------------------------===// - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) half WaveActiveSum(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) half2 WaveActiveSum(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) half3 WaveActiveSum(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) half4 WaveActiveSum(half4); - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int16_t WaveActiveSum(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int16_t2 WaveActiveSum(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int16_t3 WaveActiveSum(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int16_t4 WaveActiveSum(int16_t4); - -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint16_t WaveActiveSum(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint16_t2 WaveActiveSum(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint16_t3 WaveActiveSum(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint16_t4 WaveActiveSum(uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int WaveActiveSum(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int2 WaveActiveSum(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int3 WaveActiveSum(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int4 WaveActiveSum(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint WaveActiveSum(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint2 WaveActiveSum(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint3 WaveActiveSum(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint4 WaveActiveSum(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int64_t WaveActiveSum(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int64_t2 WaveActiveSum(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int64_t3 WaveActiveSum(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) int64_t4 WaveActiveSum(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint64_t WaveActiveSum(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint64_t2 WaveActiveSum(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint64_t3 WaveActiveSum(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) uint64_t4 WaveActiveSum(uint64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) float WaveActiveSum(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) float2 WaveActiveSum(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) float3 WaveActiveSum(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) float4 WaveActiveSum(float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) double WaveActiveSum(double); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) double2 WaveActiveSum(double2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) double3 WaveActiveSum(double3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_sum) -__attribute__((convergent)) double4 WaveActiveSum(double4); - -//===----------------------------------------------------------------------===// -// sign builtins -//===----------------------------------------------------------------------===// - -/// \fn T sign(T Val) -/// \brief Returns -1 if \a Val is less than zero; 0 if \a Val equals zero; and -/// 1 if \a Val is greater than zero. \param Val The input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int sign(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int2 sign(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int3 sign(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int4 sign(int16_t4); - -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int sign(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int2 sign(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int3 sign(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int4 sign(uint16_t4); -#endif - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int sign(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int2 sign(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int3 sign(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int4 sign(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int sign(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int2 sign(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int3 sign(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int4 sign(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int sign(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int2 sign(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int3 sign(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int4 sign(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int sign(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int2 sign(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int3 sign(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int4 sign(float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int sign(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int2 sign(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int3 sign(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int4 sign(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int sign(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int2 sign(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int3 sign(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int4 sign(uint64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int sign(double); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int2 sign(double2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int3 sign(double3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign) -int4 sign(double4); - -//===----------------------------------------------------------------------===// -// radians builtins -//===----------------------------------------------------------------------===// - -/// \fn T radians(T Val) -/// \brief Converts the specified value from degrees to radians. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) -half radians(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) -half2 radians(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) -half3 radians(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) -half4 radians(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) -float radians(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) -float2 radians(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) -float3 radians(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) -float4 radians(float4); - -//===----------------------------------------------------------------------===// -// GroupMemoryBarrierWithGroupSync builtins -//===----------------------------------------------------------------------===// - -/// \fn void GroupMemoryBarrierWithGroupSync(void) -/// \brief Blocks execution of all threads in a group until all group shared -/// accesses have been completed and all threads in the group have reached this -/// call. - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_group_memory_barrier_with_group_sync) -void GroupMemoryBarrierWithGroupSync(void); - } // namespace hlsl #endif //_HLSL_HLSL_INTRINSICS_H_ diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index 29d0adcabc82f..73eb0af8b5926 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -131,7 +131,8 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) { __gpu_num_lanes() - 1) << 32ull) | ((uint64_t)__nvvm_shfl_sync_idx_i32(__mask, __lo, __id, - __gpu_num_lanes() - 1)); + __gpu_num_lanes() - 1) & + 0xFFFFFFFF); } // Returns a bitmask of threads in the current lane for which \p x is true. @@ -179,8 +180,9 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { // Newer targets can use the dedicated CUDA support. - if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700) - return __nvvm_match_any_sync_i32(__lane_mask, __x); +#if __CUDA_ARCH__ >= 700 + return __nvvm_match_any_sync_i32(__lane_mask, __x); +#endif uint32_t __match_mask = 0; bool __done = 0; @@ -200,13 +202,14 @@ __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) { // Newer targets can use the dedicated CUDA support. - if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700) - return __nvvm_match_any_sync_i64(__lane_mask, __x); +#if __CUDA_ARCH__ >= 700 + return __nvvm_match_any_sync_i64(__lane_mask, __x); +#endif uint64_t __match_mask = 0; bool __done = 0; - while (__gpu_ballot(__lane_mask, __done)) { + while (__gpu_ballot(__lane_mask, !__done)) { if (!__done) { uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x); if (__first == __x) { @@ -223,9 +226,10 @@ __gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) { _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) { // Newer targets can use the dedicated CUDA support. +#if __CUDA_ARCH__ >= 700 int predicate; - if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700) - return __nvvm_match_all_sync_i32p(__lane_mask, __x, &predicate); + return __nvvm_match_all_sync_i32p(__lane_mask, __x, &predicate); +#endif uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x); uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first); @@ -236,9 +240,10 @@ __gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) { _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) { // Newer targets can use the dedicated CUDA support. +#if __CUDA_ARCH__ >= 700 int predicate; - if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700) - return __nvvm_match_all_sync_i64p(__lane_mask, __x, &predicate); + return __nvvm_match_all_sync_i64p(__lane_mask, __x, &predicate); +#endif uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x); uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first); diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index 20719b74b6b8d..8d8ef497cec49 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -15082,6 +15082,7 @@ half16 __ovld __cnfn shuffle2(half16, half16, ushort16); #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable #endif //cl_khr_gl_msaa_sharing +#if (defined(__opencl_c_images) || defined(__IMAGE_SUPPORT__)) /** * Use the coordinate (coord.xy) to do an element lookup in * the 2D image object specified by image. @@ -16143,6 +16144,8 @@ int __ovld __cnfn get_image_num_samples(read_write image2d_array_msaa_depth_t); #endif //defined(__opencl_c_read_write_images) #endif +#endif // (defined(__opencl_c_images) || defined(__IMAGE_SUPPORT__)) + // OpenCL v2.0 s6.13.15 - Work-group Functions #if defined(__opencl_c_work_group_collective_functions) diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h index a14c39f9f7313..338ea51ce8863 100644 --- a/clang/lib/Headers/vecintrin.h +++ b/clang/lib/Headers/vecintrin.h @@ -7,6 +7,9 @@ *===-----------------------------------------------------------------------=== */ +#ifndef _VECINTRIN_H +#define _VECINTRIN_H + #if defined(__s390x__) && defined(__VEC__) #define __ATTRS_ai __attribute__((__always_inline__)) @@ -12861,3 +12864,5 @@ vec_search_string_until_zero_cc(__vector unsigned int __a, #error "Use -fzvector to enable vector extensions" #endif + +#endif /* _VECINTRIN_H */ diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 1999d63d1aa04..5e35fa035b2b8 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -83,7 +83,7 @@ llvm::Expected IncrementalCUDADeviceParser::GeneratePTX() { std::error_code()); llvm::TargetOptions TO = llvm::TargetOptions(); llvm::TargetMachine *TargetMachine = Target->createTargetMachine( - PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO, + PTU.TheModule->getTargetTriple().str(), TargetOpts.CPU, "", TO, llvm::Reloc::Model::PIC_); PTU.TheModule->setDataLayout(TargetMachine->createDataLayout()); diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp index e43cea1baf43a..41d6304bd5f65 100644 --- a/clang/lib/Interpreter/IncrementalParser.cpp +++ b/clang/lib/Interpreter/IncrementalParser.cpp @@ -41,8 +41,9 @@ llvm::Expected IncrementalParser::ParseOrWrapTopLevelDecl() { // Recover resources if we crash before exiting this method. llvm::CrashRecoveryContextCleanupRegistrar CleanupSema(&S); - Sema::GlobalEagerInstantiationScope GlobalInstantiations(S, /*Enabled=*/true); - Sema::LocalEagerInstantiationScope LocalInstantiations(S); + Sema::GlobalEagerInstantiationScope GlobalInstantiations(S, /*Enabled=*/true, + /*AtEndOfTU=*/true); + Sema::LocalEagerInstantiationScope LocalInstantiations(S, /*AtEndOfTU=*/true); // Add a new PTU. ASTContext &C = S.getASTContext(); diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index a3b1384f0fa1d..b031571907441 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -257,12 +257,14 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, // preprocessor keywords and it wasn't macro expanded, it turns // into a simple 0 if (ValueLive) { - PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II; + unsigned DiagID = II->getName() == "true" + ? diag::warn_pp_undef_true_identifier + : diag::warn_pp_undef_identifier; + PP.Diag(PeekTok, DiagID) << II; const DiagnosticsEngine &DiagEngine = PP.getDiagnostics(); // If 'Wundef' is enabled, do not emit 'undef-prefix' diagnostics. - if (DiagEngine.isIgnored(diag::warn_pp_undef_identifier, - PeekTok.getLocation())) { + if (DiagEngine.isIgnored(DiagID, PeekTok.getLocation())) { const std::vector UndefPrefixes = DiagEngine.getDiagnosticOptions().UndefPrefixes; const StringRef IdentifierName = II->getName(); diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index e339ca8422278..91c1619e35623 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -1119,11 +1119,27 @@ struct PragmaDebugHandler : public PragmaHandler { M = MM.lookupModuleQualified(IIAndLoc.first->getName(), M); if (!M) { PP.Diag(IIAndLoc.second, diag::warn_pragma_debug_unknown_module) - << IIAndLoc.first; + << IIAndLoc.first->getName(); return; } } M->dump(); + } else if (II->isStr("module_lookup")) { + Token MName; + PP.LexUnexpandedToken(MName); + auto *MNameII = MName.getIdentifierInfo(); + if (!MNameII) { + PP.Diag(MName, diag::warn_pragma_debug_missing_argument) + << II->getName(); + return; + } + Module *M = PP.getHeaderSearchInfo().lookupModule(MNameII->getName()); + if (!M) { + PP.Diag(MName, diag::warn_pragma_debug_unable_to_find_module) + << MNameII->getName(); + return; + } + M->dump(); } else if (II->isStr("overflow_stack")) { if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash) DebugOverflowStack(); diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp index 6c01af55ef3c4..b1064eb02b907 100644 --- a/clang/lib/Parse/ParseCXXInlineMethods.cpp +++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp @@ -17,6 +17,7 @@ #include "clang/Sema/DeclSpec.h" #include "clang/Sema/EnterExpressionEvaluationContext.h" #include "clang/Sema/Scope.h" +#include "llvm/ADT/ScopeExit.h" using namespace clang; @@ -624,14 +625,21 @@ void Parser::ParseLexedMethodDef(LexedMethod &LM) { Actions.ActOnStartOfFunctionDef(getCurScope(), LM.D); - if (Tok.is(tok::kw_try)) { - ParseFunctionTryBlock(LM.D, FnScope); - + auto _ = llvm::make_scope_exit([&]() { while (Tok.isNot(tok::eof)) ConsumeAnyToken(); if (Tok.is(tok::eof) && Tok.getEofData() == LM.D) ConsumeAnyToken(); + + if (auto *FD = dyn_cast_or_null(LM.D)) + if (isa(FD) || + FD->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend)) + Actions.ActOnFinishInlineFunctionDef(FD); + }); + + if (Tok.is(tok::kw_try)) { + ParseFunctionTryBlock(LM.D, FnScope); return; } if (Tok.is(tok::colon)) { @@ -641,12 +649,6 @@ void Parser::ParseLexedMethodDef(LexedMethod &LM) { if (!Tok.is(tok::l_brace)) { FnScope.Exit(); Actions.ActOnFinishFunctionBody(LM.D, nullptr); - - while (Tok.isNot(tok::eof)) - ConsumeAnyToken(); - - if (Tok.is(tok::eof) && Tok.getEofData() == LM.D) - ConsumeAnyToken(); return; } } else @@ -660,17 +662,6 @@ void Parser::ParseLexedMethodDef(LexedMethod &LM) { "current template being instantiated!"); ParseFunctionStatementBody(LM.D, FnScope); - - while (Tok.isNot(tok::eof)) - ConsumeAnyToken(); - - if (Tok.is(tok::eof) && Tok.getEofData() == LM.D) - ConsumeAnyToken(); - - if (auto *FD = dyn_cast_or_null(LM.D)) - if (isa(FD) || - FD->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend)) - Actions.ActOnFinishInlineFunctionDef(FD); } /// ParseLexedMemberInitializers - We finished parsing the member specification diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index d036971d2fc31..6ea17c97d6345 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -617,13 +617,18 @@ unsigned getOpenACCScopeFlags(OpenACCDirectiveKind DirKind) { case OpenACCDirectiveKind::Wait: case OpenACCDirectiveKind::Init: case OpenACCDirectiveKind::Shutdown: + case OpenACCDirectiveKind::Cache: + case OpenACCDirectiveKind::Loop: + case OpenACCDirectiveKind::Atomic: + case OpenACCDirectiveKind::Declare: + case OpenACCDirectiveKind::Routine: + case OpenACCDirectiveKind::Set: + case OpenACCDirectiveKind::Update: return 0; case OpenACCDirectiveKind::Invalid: llvm_unreachable("Shouldn't be creating a scope for an invalid construct"); - default: - break; } - return 0; + llvm_unreachable("Shouldn't be creating a scope for an invalid construct"); } } // namespace @@ -981,7 +986,7 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( case OpenACCClauseKind::PresentOrCopyIn: { bool IsReadOnly = tryParseAndConsumeSpecialTokenKind( *this, OpenACCSpecialTokenKind::ReadOnly, ClauseKind); - ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind), + ParsedClause.setVarListDetails(ParseOpenACCVarList(DirKind, ClauseKind), IsReadOnly, /*IsZero=*/false); break; @@ -994,7 +999,7 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( case OpenACCClauseKind::PresentOrCopyOut: { bool IsZero = tryParseAndConsumeSpecialTokenKind( *this, OpenACCSpecialTokenKind::Zero, ClauseKind); - ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind), + ParsedClause.setVarListDetails(ParseOpenACCVarList(DirKind, ClauseKind), /*IsReadOnly=*/false, IsZero); break; } @@ -1002,7 +1007,8 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( // If we're missing a clause-kind (or it is invalid), see if we can parse // the var-list anyway. OpenACCReductionOperator Op = ParseReductionOperator(*this); - ParsedClause.setReductionDetails(Op, ParseOpenACCVarList(ClauseKind)); + ParsedClause.setReductionDetails( + Op, ParseOpenACCVarList(DirKind, ClauseKind)); break; } case OpenACCClauseKind::Self: @@ -1013,21 +1019,13 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( [[fallthrough]]; case OpenACCClauseKind::Device: case OpenACCClauseKind::Host: - ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind), - /*IsReadOnly=*/false, /*IsZero=*/false); - break; case OpenACCClauseKind::DeviceResident: case OpenACCClauseKind::Link: - ParseOpenACCVarList(ClauseKind); - break; case OpenACCClauseKind::Attach: case OpenACCClauseKind::Delete: case OpenACCClauseKind::Detach: case OpenACCClauseKind::DevicePtr: case OpenACCClauseKind::UseDevice: - ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind), - /*IsReadOnly=*/false, /*IsZero=*/false); - break; case OpenACCClauseKind::Copy: case OpenACCClauseKind::PCopy: case OpenACCClauseKind::PresentOrCopy: @@ -1035,7 +1033,7 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( case OpenACCClauseKind::NoCreate: case OpenACCClauseKind::Present: case OpenACCClauseKind::Private: - ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind), + ParsedClause.setVarListDetails(ParseOpenACCVarList(DirKind, ClauseKind), /*IsReadOnly=*/false, /*IsZero=*/false); break; case OpenACCClauseKind::Collapse: { @@ -1362,7 +1360,8 @@ ExprResult Parser::ParseOpenACCBindClauseArgument() { /// - an array element /// - a member of a composite variable /// - a common block name between slashes (fortran only) -Parser::OpenACCVarParseResult Parser::ParseOpenACCVar(OpenACCClauseKind CK) { +Parser::OpenACCVarParseResult Parser::ParseOpenACCVar(OpenACCDirectiveKind DK, + OpenACCClauseKind CK) { OpenACCArraySectionRAII ArraySections(*this); ExprResult Res = ParseAssignmentExpression(); @@ -1373,15 +1372,16 @@ Parser::OpenACCVarParseResult Parser::ParseOpenACCVar(OpenACCClauseKind CK) { if (!Res.isUsable()) return {Res, OpenACCParseCanContinue::Can}; - Res = getActions().OpenACC().ActOnVar(CK, Res.get()); + Res = getActions().OpenACC().ActOnVar(DK, CK, Res.get()); return {Res, OpenACCParseCanContinue::Can}; } -llvm::SmallVector Parser::ParseOpenACCVarList(OpenACCClauseKind CK) { +llvm::SmallVector Parser::ParseOpenACCVarList(OpenACCDirectiveKind DK, + OpenACCClauseKind CK) { llvm::SmallVector Vars; - auto [Res, CanContinue] = ParseOpenACCVar(CK); + auto [Res, CanContinue] = ParseOpenACCVar(DK, CK); if (Res.isUsable()) { Vars.push_back(Res.get()); } else if (CanContinue == OpenACCParseCanContinue::Cannot) { @@ -1392,7 +1392,7 @@ llvm::SmallVector Parser::ParseOpenACCVarList(OpenACCClauseKind CK) { while (!getCurToken().isOneOf(tok::r_paren, tok::annot_pragma_openacc_end)) { ExpectAndConsume(tok::comma); - auto [Res, CanContinue] = ParseOpenACCVar(CK); + auto [Res, CanContinue] = ParseOpenACCVar(DK, CK); if (Res.isUsable()) { Vars.push_back(Res.get()); @@ -1408,25 +1408,29 @@ llvm::SmallVector Parser::ParseOpenACCVarList(OpenACCClauseKind CK) { /// In C and C++, the syntax of the cache directive is: /// /// #pragma acc cache ([readonly:]var-list) new-line -void Parser::ParseOpenACCCacheVarList() { +Parser::OpenACCCacheParseInfo Parser::ParseOpenACCCacheVarList() { // If this is the end of the line, just return 'false' and count on the close // paren diagnostic to catch the issue. if (getCurToken().isAnnotation()) - return; + return {}; + + OpenACCCacheParseInfo CacheInfo; + SourceLocation ReadOnlyLoc = getCurToken().getLocation(); // The VarList is an optional `readonly:` followed by a list of a variable // specifications. Consume something that looks like a 'tag', and diagnose if // it isn't 'readonly'. if (tryParseAndConsumeSpecialTokenKind(*this, OpenACCSpecialTokenKind::ReadOnly, - OpenACCDirectiveKind::Cache)) { - // FIXME: Record that this is a 'readonly' so that we can use that during - // Sema/AST generation. - } + OpenACCDirectiveKind::Cache)) + CacheInfo.ReadOnlyLoc = ReadOnlyLoc; // ParseOpenACCVarList should leave us before a r-paren, so no need to skip // anything here. - ParseOpenACCVarList(OpenACCClauseKind::Invalid); + CacheInfo.Vars = ParseOpenACCVarList(OpenACCDirectiveKind::Cache, + OpenACCClauseKind::Invalid); + + return CacheInfo; } Parser::OpenACCDirectiveParseInfo @@ -1435,7 +1439,9 @@ Parser::ParseOpenACCDirective() { SourceLocation DirLoc = getCurToken().getLocation(); OpenACCDirectiveKind DirKind = ParseOpenACCDirectiveKind(*this); Parser::OpenACCWaitParseInfo WaitInfo; + Parser::OpenACCCacheParseInfo CacheInfo; OpenACCAtomicKind AtomicKind = OpenACCAtomicKind::None; + ExprResult RoutineName; getActions().OpenACC().ActOnConstruct(DirKind, DirLoc); @@ -1459,17 +1465,20 @@ Parser::ParseOpenACCDirective() { case OpenACCDirectiveKind::Routine: { // Routine has an optional paren-wrapped name of a function in the local // scope. We parse the name, emitting any diagnostics - ExprResult RoutineName = ParseOpenACCIDExpression(); + RoutineName = ParseOpenACCIDExpression(); // If the routine name is invalid, just skip until the closing paren to // recover more gracefully. - if (RoutineName.isInvalid()) + if (!RoutineName.isUsable()) { T.skipToEnd(); - else + } else { T.consumeClose(); + RoutineName = + getActions().OpenACC().ActOnRoutineName(RoutineName.get()); + } break; } case OpenACCDirectiveKind::Cache: - ParseOpenACCCacheVarList(); + CacheInfo = ParseOpenACCCacheVarList(); // The ParseOpenACCCacheVarList function manages to recover from failures, // so we can always consume the close. T.consumeClose(); @@ -1497,10 +1506,21 @@ Parser::ParseOpenACCDirective() { T.getOpenLocation(), T.getCloseLocation(), /*EndLoc=*/SourceLocation{}, - WaitInfo.QueuesLoc, + (DirKind == OpenACCDirectiveKind::Wait + ? WaitInfo.QueuesLoc + : CacheInfo.ReadOnlyLoc), AtomicKind, - WaitInfo.getAllExprs(), - ParseOpenACCClauseList(DirKind)}; + {}, + {}}; + + if (DirKind == OpenACCDirectiveKind::Wait) + ParseInfo.Exprs = WaitInfo.getAllExprs(); + else if (DirKind == OpenACCDirectiveKind::Cache) + ParseInfo.Exprs = std::move(CacheInfo.Vars); + else if (DirKind == OpenACCDirectiveKind::Routine && RoutineName.isUsable()) + ParseInfo.Exprs = llvm::SmallVector(1, RoutineName.get()); + + ParseInfo.Clauses = ParseOpenACCClauseList(DirKind); assert(Tok.is(tok::annot_pragma_openacc_end) && "Didn't parse all OpenACC Clauses"); @@ -1519,12 +1539,14 @@ Parser::DeclGroupPtrTy Parser::ParseOpenACCDirectiveDecl() { OpenACCDirectiveParseInfo DirInfo = ParseOpenACCDirective(); - if (getActions().OpenACC().ActOnStartDeclDirective(DirInfo.DirKind, - DirInfo.StartLoc)) + if (getActions().OpenACC().ActOnStartDeclDirective( + DirInfo.DirKind, DirInfo.StartLoc, DirInfo.Clauses)) return nullptr; - // TODO OpenACC: Do whatever decl parsing is required here. - return DeclGroupPtrTy::make(getActions().OpenACC().ActOnEndDeclDirective()); + return DeclGroupPtrTy::make(getActions().OpenACC().ActOnEndDeclDirective( + DirInfo.DirKind, DirInfo.StartLoc, DirInfo.DirLoc, DirInfo.LParenLoc, + DirInfo.Exprs.empty() ? nullptr : DirInfo.Exprs.front(), + DirInfo.RParenLoc, DirInfo.EndLoc, DirInfo.Clauses)); } // Parse OpenACC Directive on a Statement. diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 42e6aac681c1c..b791c5d5e3019 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2883,6 +2883,15 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( /*ReadDirectiveWithinMetadirective=*/true); break; } + // If no match is found and no otherwise clause is present, skip + // OMP5.2 Chapter 7.4: If no otherwise clause is specified the effect is as + // if one was specified without an associated directive variant. + if (BestIdx == -1 && Idx == 1) { + assert(Tok.is(tok::annot_pragma_openmp_end) && + "Expecting the end of the pragma here"); + ConsumeAnnotationToken(); + return StmtEmpty(); + } break; } case OMPD_threadprivate: { diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index c699e92985156..b3fba097999f5 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1798,6 +1798,47 @@ class DeferredDiagnosticsEmitter Inherited::visitUsedDecl(Loc, D); } + // Visitor member and parent dtors called by this dtor. + void VisitCalledDestructors(CXXDestructorDecl *DD) { + const CXXRecordDecl *RD = DD->getParent(); + + // Visit the dtors of all members + for (const FieldDecl *FD : RD->fields()) { + QualType FT = FD->getType(); + if (const auto *RT = FT->getAs()) + if (const auto *ClassDecl = dyn_cast(RT->getDecl())) + if (ClassDecl->hasDefinition()) + if (CXXDestructorDecl *MemberDtor = ClassDecl->getDestructor()) + asImpl().visitUsedDecl(MemberDtor->getLocation(), MemberDtor); + } + + // Also visit base class dtors + for (const auto &Base : RD->bases()) { + QualType BaseType = Base.getType(); + if (const auto *RT = BaseType->getAs()) + if (const auto *BaseDecl = dyn_cast(RT->getDecl())) + if (BaseDecl->hasDefinition()) + if (CXXDestructorDecl *BaseDtor = BaseDecl->getDestructor()) + asImpl().visitUsedDecl(BaseDtor->getLocation(), BaseDtor); + } + } + + void VisitDeclStmt(DeclStmt *DS) { + // Visit dtors called by variables that need destruction + for (auto *D : DS->decls()) + if (auto *VD = dyn_cast(D)) + if (VD->isThisDeclarationADefinition() && + VD->needsDestruction(S.Context)) { + QualType VT = VD->getType(); + if (const auto *RT = VT->getAs()) + if (const auto *ClassDecl = dyn_cast(RT->getDecl())) + if (ClassDecl->hasDefinition()) + if (CXXDestructorDecl *Dtor = ClassDecl->getDestructor()) + asImpl().visitUsedDecl(Dtor->getLocation(), Dtor); + } + + Inherited::VisitDeclStmt(DS); + } void checkVar(VarDecl *VD) { assert(VD->isFileVarDecl() && "Should only check file-scope variables"); @@ -1839,6 +1880,8 @@ class DeferredDiagnosticsEmitter if (auto *S = FD->getBody()) { this->Visit(S); } + if (CXXDestructorDecl *Dtor = dyn_cast(FD)) + asImpl().VisitCalledDestructors(Dtor); UsePath.pop_back(); InUsePath.erase(FD); } diff --git a/clang/lib/Sema/SemaBase.cpp b/clang/lib/Sema/SemaBase.cpp index 5c24f21b469b0..85c4a0ab40fed 100644 --- a/clang/lib/Sema/SemaBase.cpp +++ b/clang/lib/Sema/SemaBase.cpp @@ -9,6 +9,7 @@ SemaBase::SemaBase(Sema &S) : SemaRef(S) {} ASTContext &SemaBase::getASTContext() const { return SemaRef.Context; } DiagnosticsEngine &SemaBase::getDiagnostics() const { return SemaRef.Diags; } const LangOptions &SemaBase::getLangOpts() const { return SemaRef.LangOpts; } +DeclContext *SemaBase::getCurContext() const { return SemaRef.CurContext; } SemaBase::ImmediateDiagBuilder::~ImmediateDiagBuilder() { // If we aren't active, there is nothing to do. diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 0e1bf727d72d2..0e5fc5e1a40b4 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -372,6 +372,21 @@ bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, CXXMethodDecl *MemberDecl, bool ConstRHS, bool Diagnose) { + // If MemberDecl is virtual destructor of an explicit template class + // instantiation, it must be emitted, therefore it needs to be inferred + // conservatively by ignoring implicit host/device attrs of member and parent + // dtors called by it. Also, it needs to be checed by deferred diag visitor. + bool IsExpVDtor = false; + if (isa(MemberDecl) && MemberDecl->isVirtual()) { + if (auto *Spec = dyn_cast(ClassDecl)) { + TemplateSpecializationKind TSK = Spec->getTemplateSpecializationKind(); + IsExpVDtor = TSK == TSK_ExplicitInstantiationDeclaration || + TSK == TSK_ExplicitInstantiationDefinition; + } + } + if (IsExpVDtor) + SemaRef.DeclsToCheckForDeferredDiags.insert(MemberDecl); + // If the defaulted special member is defined lexically outside of its // owning class, or the special member already has explicit device or host // attributes, do not infer. @@ -422,7 +437,9 @@ bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, if (!SMOR.getMethod()) continue; - CUDAFunctionTarget BaseMethodTarget = IdentifyTarget(SMOR.getMethod()); + CUDAFunctionTarget BaseMethodTarget = + IdentifyTarget(SMOR.getMethod(), IsExpVDtor); + if (!InferredTarget) { InferredTarget = BaseMethodTarget; } else { @@ -466,7 +483,9 @@ bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, if (!SMOR.getMethod()) continue; - CUDAFunctionTarget FieldMethodTarget = IdentifyTarget(SMOR.getMethod()); + CUDAFunctionTarget FieldMethodTarget = + IdentifyTarget(SMOR.getMethod(), IsExpVDtor); + if (!InferredTarget) { InferredTarget = FieldMethodTarget; } else { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index f9926c6b4adab..9cac9cf5c4df7 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -6149,18 +6149,19 @@ static StringLiteralCheckType checkFormatStringExpr( if (!Sema::getFormatStringInfo(D, PVFormat->getFormatIdx(), PVFormat->getFirstArg(), &CallerFSI)) continue; - // We also check if the formats are compatible. - // We can't pass a 'scanf' string to a 'printf' function. - if (Type != S.GetFormatStringType(PVFormat)) { - S.Diag(Args[format_idx]->getBeginLoc(), - diag::warn_format_string_type_incompatible) - << PVFormat->getType()->getName() - << S.GetFormatStringTypeName(Type); - if (!InFunctionCall) { - S.Diag(E->getBeginLoc(), diag::note_format_string_defined); + if (PV->getFunctionScopeIndex() == CallerFSI.FormatIdx) { + // We also check if the formats are compatible. + // We can't pass a 'scanf' string to a 'printf' function. + if (Type != S.GetFormatStringType(PVFormat)) { + S.Diag(Args[format_idx]->getBeginLoc(), + diag::warn_format_string_type_incompatible) + << PVFormat->getType()->getName() + << S.GetFormatStringTypeName(Type); + if (!InFunctionCall) { + S.Diag(E->getBeginLoc(), diag::note_format_string_defined); + } + return SLCT_UncheckedLiteral; } - return SLCT_UncheckedLiteral; - } else if (PV->getFunctionScopeIndex() == CallerFSI.FormatIdx) { // Lastly, check that argument passing kinds transition in a // way that makes sense: // from a caller with FAPK_VAList, allow FAPK_VAList diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 80ae87e7c5725..db467d76b5d32 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -6229,8 +6229,8 @@ static void mergeCandidatesWithResults( // Sort the overload candidate set by placing the best overloads first. llvm::stable_sort(CandidateSet, [&](const OverloadCandidate &X, const OverloadCandidate &Y) { - return isBetterOverloadCandidate(SemaRef, X, Y, Loc, - CandidateSet.getKind()); + return isBetterOverloadCandidate(SemaRef, X, Y, Loc, CandidateSet.getKind(), + /*PartialOverloading=*/true); }); // Add the remaining viable overload candidates as code-completion results. diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 285bd27a35a76..714210c3856d7 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -50,6 +50,7 @@ #include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaObjC.h" +#include "clang/Sema/SemaOpenACC.h" #include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/SemaPPC.h" #include "clang/Sema/SemaRISCV.h" @@ -60,6 +61,7 @@ #include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/SaveAndRestore.h" #include "llvm/TargetParser/Triple.h" #include #include @@ -137,6 +139,26 @@ class TypeNameValidatorCCC final : public CorrectionCandidateCallback { } // end anonymous namespace +QualType Sema::getTypeDeclType(DeclContext *LookupCtx, DiagCtorKind DCK, + TypeDecl *TD, SourceLocation NameLoc) { + auto *LookupRD = dyn_cast_or_null(LookupCtx); + auto *FoundRD = dyn_cast(TD); + if (DCK != DiagCtorKind::None && LookupRD && FoundRD && + FoundRD->isInjectedClassName() && + declaresSameEntity(LookupRD, cast(FoundRD->getParent()))) { + Diag(NameLoc, + DCK == DiagCtorKind::Typename + ? diag::ext_out_of_line_qualified_id_type_names_constructor + : diag::err_out_of_line_qualified_id_type_names_constructor) + << TD->getIdentifier() << /*Type=*/1 + << 0 /*if any keyword was present, it was 'typename'*/; + } + + DiagnoseUseOfDecl(TD, NameLoc); + MarkAnyDeclReferenced(TD->getLocation(), TD, /*OdrUse=*/false); + return Context.getTypeDeclType(TD); +} + namespace { enum class UnqualifiedTypeNameLookupResult { NotFound, @@ -293,10 +315,11 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc, bool IsClassTemplateDeductionContext, ImplicitTypenameContext AllowImplicitTypename, IdentifierInfo **CorrectedII) { + bool IsImplicitTypename = !isClassName && !IsCtorOrDtorName; // FIXME: Consider allowing this outside C++1z mode as an extension. bool AllowDeducedTemplate = IsClassTemplateDeductionContext && - getLangOpts().CPlusPlus17 && !IsCtorOrDtorName && - !isClassName && !HasTrailingDot; + getLangOpts().CPlusPlus17 && IsImplicitTypename && + !HasTrailingDot; // Determine where we will perform name lookup. DeclContext *LookupCtx = nullptr; @@ -320,11 +343,9 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc, // refer to a member of an unknown specialization. // In C++2a, in several contexts a 'typename' is not required. Also // allow this as an extension. - if (AllowImplicitTypename == ImplicitTypenameContext::No && - !isClassName && !IsCtorOrDtorName) - return nullptr; - bool IsImplicitTypename = !isClassName && !IsCtorOrDtorName; if (IsImplicitTypename) { + if (AllowImplicitTypename == ImplicitTypenameContext::No) + return nullptr; SourceLocation QualifiedLoc = SS->getRange().getBegin(); if (getLangOpts().CPlusPlus20) Diag(QualifiedLoc, diag::warn_cxx17_compat_implicit_typename); @@ -513,18 +534,10 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc, // C++ [class.qual]p2: A lookup that would find the injected-class-name // instead names the constructors of the class, except when naming a class. // This is ill-formed when we're not actually forming a ctor or dtor name. - auto *LookupRD = dyn_cast_or_null(LookupCtx); - auto *FoundRD = dyn_cast(TD); - if (!isClassName && !IsCtorOrDtorName && LookupRD && FoundRD && - FoundRD->isInjectedClassName() && - declaresSameEntity(LookupRD, cast(FoundRD->getParent()))) - Diag(NameLoc, diag::err_out_of_line_qualified_id_type_names_constructor) - << &II << /*Type*/1; - - DiagnoseUseOfDecl(IIDecl, NameLoc); - - T = Context.getTypeDeclType(TD); - MarkAnyDeclReferenced(TD->getLocation(), TD, /*OdrUse=*/false); + T = getTypeDeclType(LookupCtx, + IsImplicitTypename ? DiagCtorKind::Implicit + : DiagCtorKind::None, + TD, NameLoc); } else if (ObjCInterfaceDecl *IDecl = dyn_cast(IIDecl)) { (void)DiagnoseUseOfDecl(IDecl, NameLoc); if (!HasTrailingDot) @@ -7966,6 +7979,8 @@ NamedDecl *Sema::ActOnVariableDeclarator( if (getLangOpts().HLSL) HLSL().ActOnVariableDeclarator(NewVD); + if (getLangOpts().OpenACC) + OpenACC().ActOnVariableDeclarator(NewVD); // FIXME: This is probably the wrong location to be doing this and we should // probably be doing this for more attributes (especially for function @@ -12597,6 +12612,7 @@ namespace { bool isRecordType; bool isPODType; bool isReferenceType; + bool isInCXXOperatorCall; bool isInitList; llvm::SmallVector InitFieldIndex; @@ -12609,6 +12625,7 @@ namespace { isPODType = false; isRecordType = false; isReferenceType = false; + isInCXXOperatorCall = false; isInitList = false; if (ValueDecl *VD = dyn_cast(OrigDecl)) { isPODType = VD->getType().isPODType(S.Context); @@ -12796,6 +12813,7 @@ namespace { } void VisitCXXOperatorCallExpr(CXXOperatorCallExpr *E) { + llvm::SaveAndRestore CxxOpCallScope(isInCXXOperatorCall, true); Expr *Callee = E->getCallee(); if (isa(Callee)) @@ -12806,6 +12824,19 @@ namespace { HandleValue(Arg->IgnoreParenImpCasts()); } + void VisitLambdaExpr(LambdaExpr *E) { + if (!isInCXXOperatorCall) { + Inherited::VisitLambdaExpr(E); + return; + } + + for (Expr *Init : E->capture_inits()) + if (DeclRefExpr *DRE = dyn_cast_if_present(Init)) + HandleDeclRefExpr(DRE); + else if (Init) + Visit(Init); + } + void VisitUnaryOperator(UnaryOperator *E) { // For POD record types, addresses of its own members are well-defined. if (E->getOpcode() == UO_AddrOf && isRecordType && @@ -13427,9 +13458,13 @@ bool Sema::GloballyUniqueObjectMightBeAccidentallyDuplicated( FunDcl->getTemplateSpecializationKind() != TSK_Undeclared; } - // Non-inline functions/variables can only legally appear in one TU, - // unless they were part of a template. - if (!TargetIsInline && !TargetWasTemplated) + // Non-inline functions/variables can only legally appear in one TU + // unless they were part of a template. Unfortunately, making complex + // template instantiations visible is infeasible in practice, since + // everything the template depends on also has to be visible. To avoid + // giving impractical-to-fix warnings, don't warn if we're inside + // something that was templated, even on inline stuff. + if (!TargetIsInline || TargetWasTemplated) return false; // If the object isn't hidden, the dynamic linker will prevent duplication. @@ -13469,8 +13504,8 @@ void Sema::DiagnoseUniqueObjectDuplication(const VarDecl *VD) { // FIXME: Windows uses dllexport/dllimport instead of visibility, and we don't // handle that yet. Disable the warning on Windows for now. - // Don't diagnose if we're inside a template; - // we'll diagnose during instantiation instead. + // Don't diagnose if we're inside a template, because it's not practical to + // fix the warning in most cases. if (!Context.getTargetInfo().shouldDLLImportComdatSymbols() && !VD->isTemplated() && GloballyUniqueObjectMightBeAccidentallyDuplicated(VD)) { @@ -20469,6 +20504,21 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, if (IsEmittedForExternalSymbol()) return FunctionEmissionStatus::Emitted; + + // If FD is a virtual destructor of an explicit instantiation + // of a template class, return Emitted. + if (auto *Destructor = dyn_cast(FD)) { + if (Destructor->isVirtual()) { + if (auto *Spec = dyn_cast( + Destructor->getParent())) { + TemplateSpecializationKind TSK = + Spec->getTemplateSpecializationKind(); + if (TSK == TSK_ExplicitInstantiationDeclaration || + TSK == TSK_ExplicitInstantiationDefinition) + return FunctionEmissionStatus::Emitted; + } + } + } } // Otherwise, the function is known-emitted if it's in our set of diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 942259b57c88b..c1e97aa2dde5b 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2138,6 +2138,8 @@ static void handleConstructorAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (AL.getNumArgs() && !S.checkUInt32Argument(AL, AL.getArgAsExpr(0), priority)) return; + S.Diag(D->getLocation(), diag::warn_global_constructor) + << D->getSourceRange(); D->addAttr(::new (S.Context) ConstructorAttr(S.Context, AL, priority)); } @@ -2147,6 +2149,7 @@ static void handleDestructorAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (AL.getNumArgs() && !S.checkUInt32Argument(AL, AL.getArgAsExpr(0), priority)) return; + S.Diag(D->getLocation(), diag::warn_global_destructor) << D->getSourceRange(); D->addAttr(::new (S.Context) DestructorAttr(S.Context, AL, priority)); } @@ -3720,16 +3723,18 @@ static void handleInitPriorityAttr(Sema &S, Decl *D, const ParsedAttr &AL) { return; } - // Only perform the priority check if the attribute is outside of a system - // header. Values <= 100 are reserved for the implementation, and libc++ - // benefits from being able to specify values in that range. - if ((prioritynum < 101 || prioritynum > 65535) && - !S.getSourceManager().isInSystemHeader(AL.getLoc())) { + if (prioritynum > 65535) { S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_range) - << E->getSourceRange() << AL << 101 << 65535; + << E->getSourceRange() << AL << 0 << 65535; AL.setInvalid(); return; } + + // Values <= 100 are reserved for the implementation, and libc++ + // benefits from being able to specify values in that range. + if (prioritynum < 101) + S.Diag(AL.getLoc(), diag::warn_init_priority_reserved) + << E->getSourceRange() << prioritynum; D->addAttr(::new (S.Context) InitPriorityAttr(S.Context, AL, prioritynum)); } @@ -5206,6 +5211,25 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { case ParsedAttr::AT_RISCVVectorCC: D->addAttr(::new (S.Context) RISCVVectorCCAttr(S.Context, AL)); return; + case ParsedAttr::AT_RISCVVLSCC: { + // If the riscv_abi_vlen doesn't have any argument, default ABI_VLEN is 128. + unsigned VectorLength = 128; + if (AL.getNumArgs() && + !S.checkUInt32Argument(AL, AL.getArgAsExpr(0), VectorLength)) + return; + if (VectorLength < 32 || VectorLength > 65536) { + S.Diag(AL.getLoc(), diag::err_argument_invalid_range) + << VectorLength << 32 << 65536; + return; + } + if (!llvm::isPowerOf2_64(VectorLength)) { + S.Diag(AL.getLoc(), diag::err_argument_not_power_of_2); + return; + } + + D->addAttr(::new (S.Context) RISCVVLSCCAttr(S.Context, AL, VectorLength)); + return; + } default: llvm_unreachable("unexpected attribute kind"); } @@ -5325,10 +5349,19 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, return false; } - unsigned ReqArgs = Attrs.getKind() == ParsedAttr::AT_Pcs ? 1 : 0; - if (!Attrs.checkExactlyNumArgs(*this, ReqArgs)) { - Attrs.setInvalid(); - return true; + if (Attrs.getKind() == ParsedAttr::AT_RISCVVLSCC) { + // riscv_vls_cc only accepts 0 or 1 argument. + if (!Attrs.checkAtLeastNumArgs(*this, 0) || + !Attrs.checkAtMostNumArgs(*this, 1)) { + Attrs.setInvalid(); + return true; + } + } else { + unsigned ReqArgs = Attrs.getKind() == ParsedAttr::AT_Pcs ? 1 : 0; + if (!Attrs.checkExactlyNumArgs(*this, ReqArgs)) { + Attrs.setInvalid(); + return true; + } } // TODO: diagnose uses of these conventions on the wrong target. @@ -5413,6 +5446,30 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, case ParsedAttr::AT_RISCVVectorCC: CC = CC_RISCVVectorCall; break; + case ParsedAttr::AT_RISCVVLSCC: { + // If the riscv_abi_vlen doesn't have any argument, we set set it to default + // value 128. + unsigned ABIVLen = 128; + if (Attrs.getNumArgs() && + !checkUInt32Argument(Attrs, Attrs.getArgAsExpr(0), ABIVLen)) { + Attrs.setInvalid(); + return true; + } + if (Attrs.getNumArgs() && (ABIVLen < 32 || ABIVLen > 65536)) { + Attrs.setInvalid(); + Diag(Attrs.getLoc(), diag::err_argument_invalid_range) + << ABIVLen << 32 << 65536; + return true; + } + if (!llvm::isPowerOf2_64(ABIVLen)) { + Attrs.setInvalid(); + Diag(Attrs.getLoc(), diag::err_argument_not_power_of_2); + return true; + } + CC = static_cast(CallingConv::CC_RISCVVLSCall_32 + + llvm::Log2_64(ABIVLen) - 5); + break; + } default: llvm_unreachable("unexpected attribute kind"); } @@ -7271,6 +7328,7 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_M68kRTD: case ParsedAttr::AT_PreserveNone: case ParsedAttr::AT_RISCVVectorCC: + case ParsedAttr::AT_RISCVVLSCC: handleCallConvAttr(S, D, AL); break; case ParsedAttr::AT_Suppress: diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 664d48ccbc382..fd5f0443fa894 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -733,8 +733,11 @@ Sema::ActOnDecompositionDeclarator(Scope *S, Declarator &D, } if (!TemplateParamLists.empty()) { - // FIXME: There's no rule against this, but there are also no rules that - // would actually make it usable, so we reject it for now. + // C++17 [temp]/1: + // A template defines a family of class, functions, or variables, or an + // alias for a family of types. + // + // Structured bindings are not included. Diag(TemplateParamLists.front()->getTemplateLoc(), diag::err_decomp_decl_template); return nullptr; @@ -883,8 +886,7 @@ Sema::ActOnDecompositionDeclarator(Scope *S, Declarator &D, // It's not permitted to shadow a template parameter name. if (Previous.isSingleResult() && Previous.getFoundDecl()->isTemplateParameter()) { - DiagnoseTemplateParameterShadow(D.getIdentifierLoc(), - Previous.getFoundDecl()); + DiagnoseTemplateParameterShadow(B.NameLoc, Previous.getFoundDecl()); Previous.clear(); } diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index a8eb24133a76d..f358d8342e2f3 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1407,6 +1407,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OpenACCEnterDataConstructClass: case Stmt::OpenACCExitDataConstructClass: case Stmt::OpenACCWaitConstructClass: + case Stmt::OpenACCCacheConstructClass: case Stmt::OpenACCInitConstructClass: case Stmt::OpenACCShutdownConstructClass: case Stmt::OpenACCSetConstructClass: diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 1738663327453..f896ccab53a54 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -11247,6 +11247,10 @@ static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS, if (S.getLangOpts().OpenCL) return; + if (Opc == BO_Shr && + LHS.get()->IgnoreParenImpCasts()->getType()->isBooleanType()) + S.Diag(Loc, diag::warn_shift_bool) << LHS.get()->getSourceRange(); + // Check right/shifter operand Expr::EvalResult RHSResult; if (RHS.get()->isValueDependent() || @@ -14924,7 +14928,8 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc, if (const auto *BI = dyn_cast(LHSExpr); BI && BI->isComparisonOp()) - Diag(OpLoc, diag::warn_consecutive_comparison); + Diag(OpLoc, diag::warn_consecutive_comparison) + << BI->getOpcodeStr() << BinaryOperator::getOpcodeStr(Opc); break; case BO_EQ: @@ -17978,10 +17983,9 @@ static bool isPotentiallyConstantEvaluatedContext(Sema &SemaRef) { /// Return true if this function has a calling convention that requires mangling /// in the size of the parameter pack. static bool funcHasParameterSizeMangling(Sema &S, FunctionDecl *FD) { - // These manglings don't do anything on non-Windows or non-x86 platforms, so - // we don't need parameter type sizes. - const llvm::Triple &TT = S.Context.getTargetInfo().getTriple(); - if (!TT.isOSWindows() || !TT.isX86()) + // These manglings are only applicable for targets whcih use Microsoft + // mangling scheme for C. + if (!S.Context.getTargetInfo().shouldUseMicrosoftCCforMangling()) return false; // If this is C++ and this isn't an extern "C" function, parameters do not diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index ffc3ac1b65854..aff349a932eec 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2086,6 +2086,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) { checkAllFloatTypes); } +static bool CheckUnsignedIntRepresentations(Sema *S, CallExpr *TheCall) { + auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool { + clang::QualType BaseType = + PassedType->isVectorType() + ? PassedType->getAs()->getElementType() + : PassedType; + return !BaseType->isUnsignedIntegerType(); + }; + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy, + checkUnsignedInteger); +} + static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) { auto checkFloatorHalf = [](clang::QualType PassedType) -> bool { clang::QualType BaseType = @@ -2277,6 +2289,52 @@ static bool CheckResourceHandle( // returning an ExprError bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { switch (BuiltinID) { + case Builtin::BI__builtin_hlsl_adduint64: { + if (SemaRef.checkArgCount(TheCall, 2)) + return true; + if (CheckVectorElementCallArgs(&SemaRef, TheCall)) + return true; + if (CheckUnsignedIntRepresentations(&SemaRef, TheCall)) + return true; + + // CheckVectorElementCallArgs(...) guarantees both args are the same type. + assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() && + "Both args must be of the same type"); + + // ensure both args are vectors + auto *VTy = TheCall->getArg(0)->getType()->getAs(); + if (!VTy) { + SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector) + << TheCall->getDirectCallee() << /*all*/ 1; + return true; + } + + // ensure arg integers are 32-bits + uint64_t ElementBitCount = getASTContext() + .getTypeSizeInChars(VTy->getElementType()) + .getQuantity() * + 8; + if (ElementBitCount != 32) { + SemaRef.Diag(TheCall->getBeginLoc(), + diag::err_integer_incorrect_bit_count) + << 32 << ElementBitCount; + return true; + } + + // ensure both args are vectors of total bit size of a multiple of 64 + int NumElementsArg = VTy->getNumElements(); + if (NumElementsArg != 2 && NumElementsArg != 4) { + SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count) + << 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount; + return true; + } + + ExprResult A = TheCall->getArg(0); + QualType ArgTyA = A.get()->getType(); + // return type is the same as the input type + TheCall->setType(ArgTyA); + break; + } case Builtin::BI__builtin_hlsl_resource_getpointer: { if (SemaRef.checkArgCount(TheCall, 2) || CheckResourceHandle(&SemaRef, TheCall, 0) || @@ -2293,7 +2351,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { break; } - case Builtin::BI__builtin_hlsl_and: { + case Builtin::BI__builtin_hlsl_and: + case Builtin::BI__builtin_hlsl_or: { if (SemaRef.checkArgCount(TheCall, 2)) return true; if (CheckVectorElementCallArgs(&SemaRef, TheCall)) @@ -3064,6 +3123,16 @@ void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) { if (VD->getType()->isHLSLIntangibleType()) collectResourceBindingsOnVarDecl(VD); + const Type *VarType = VD->getType().getTypePtr(); + while (VarType->isArrayType()) + VarType = VarType->getArrayElementTypeNoTypeQual(); + if (VarType->isHLSLResourceRecord()) { + // Make the variable for resources static. The global externally visible + // storage is accessed through the handle, which is a member. The variable + // itself is not externally visible. + VD->setStorageClass(StorageClass::SC_Static); + } + // process explicit bindings processExplicitBindingsOnDecl(VD); } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 925af06894f72..edbd4c071b563 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -519,12 +519,13 @@ class InitListChecker { uint64_t ElsCount = 1; // Otherwise try to fill whole array with embed data. if (Entity.getKind() == InitializedEntity::EK_ArrayElement) { + unsigned ArrIndex = Entity.getElementIndex(); auto *AType = SemaRef.Context.getAsArrayType(Entity.getParent()->getType()); assert(AType && "expected array type when initializing array"); ElsCount = Embed->getDataElementCount(); if (const auto *CAType = dyn_cast(AType)) - ElsCount = std::min(CAType->getSize().getZExtValue(), + ElsCount = std::min(CAType->getSize().getZExtValue() - ArrIndex, ElsCount - CurEmbedIndex); if (ElsCount == Embed->getDataElementCount()) { CurEmbed = nullptr; @@ -1317,7 +1318,7 @@ void InitListChecker::CheckExplicitInitList(const InitializedEntity &Entity, return; // Don't complain for incomplete types, since we'll get an error elsewhere. - if (Index < IList->getNumInits() && !T->isIncompleteType()) { + if ((Index < IList->getNumInits() || CurEmbed) && !T->isIncompleteType()) { // We have leftover initializers bool ExtraInitsIsError = SemaRef.getLangOpts().CPlusPlus || (SemaRef.getLangOpts().OpenCL && T->isVectorType()); @@ -2180,6 +2181,7 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity, InitializedEntity ElementEntity = InitializedEntity::InitializeElement( SemaRef.Context, StructuredIndex, Entity); + ElementEntity.setElementIndex(elementIndex.getExtValue()); unsigned EmbedElementIndexBeforeInit = CurEmbedIndex; // Check this element. @@ -4261,7 +4263,7 @@ static bool TryInitializerListConstruction(Sema &S, QualType ArrayType = S.Context.getConstantArrayType( E.withConst(), llvm::APInt(S.Context.getTypeSize(S.Context.getSizeType()), - List->getNumInits()), + List->getNumInitsWithEmbedExpanded()), nullptr, clang::ArraySizeModifier::Normal, 0); InitializedEntity HiddenArray = InitializedEntity::InitializeTemporary(ArrayType); @@ -7269,7 +7271,7 @@ static void CheckCXX98CompatAccessibleCopy(Sema &S, void InitializationSequence::PrintInitLocationNote(Sema &S, const InitializedEntity &Entity) { - if (Entity.isParamOrTemplateParamKind() && Entity.getDecl()) { + if (Entity.isParameterKind() && Entity.getDecl()) { if (Entity.getDecl()->getLocation().isInvalid()) return; @@ -7278,9 +7280,8 @@ void InitializationSequence::PrintInitLocationNote(Sema &S, << Entity.getDecl()->getDeclName(); else S.Diag(Entity.getDecl()->getLocation(), diag::note_parameter_here); - } - else if (Entity.getKind() == InitializedEntity::EK_RelatedResult && - Entity.getMethodDecl()) + } else if (Entity.getKind() == InitializedEntity::EK_RelatedResult && + Entity.getMethodDecl()) S.Diag(Entity.getMethodDecl()->getLocation(), diag::note_method_return_type_change) << Entity.getMethodDecl()->getDeclName(); diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index ceb32ee15dfa3..4d278bbc67d28 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -1506,14 +1506,13 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, TemplateParameterList *TemplateParams = getGenericLambdaTemplateParameterList(LSI, *this); if (TemplateParams) { - for (const auto *TP : TemplateParams->asArray()) { + for (auto *TP : TemplateParams->asArray()) { if (!TP->getIdentifier()) continue; + CheckTemplateParameterRAII CTP(*this, TP); for (const auto &Capture : Intro.Captures) { - if (Capture.Id == TP->getIdentifier()) { + if (Capture.Id == TP->getIdentifier()) Diag(Capture.Loc, diag::err_template_param_shadow) << Capture.Id; - NoteTemplateParameterLocation(*TP); - } } } } diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index aecf8ed1b4e4d..f3af514596a7e 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -1580,9 +1580,13 @@ llvm::DenseSet &Sema::getLookupModules() { unsigned N = CodeSynthesisContexts.size(); for (unsigned I = CodeSynthesisContextLookupModules.size(); I != N; ++I) { - Module *M = CodeSynthesisContexts[I].Entity ? - getDefiningModule(*this, CodeSynthesisContexts[I].Entity) : - nullptr; + auto &Ctx = CodeSynthesisContexts[I]; + // FIXME: Are there any other context kinds that shouldn't be looked at + // here? + if (Ctx.Kind == CodeSynthesisContext::PartialOrderingTTP || + Ctx.Kind == CodeSynthesisContext::CheckTemplateParameter) + continue; + Module *M = Ctx.Entity ? getDefiningModule(*this, Ctx.Entity) : nullptr; if (M && !LookupModulesCache.insert(M).second) M = nullptr; CodeSynthesisContextLookupModules.push_back(M); @@ -3703,7 +3707,8 @@ Sema::LookupLiteralOperator(Scope *S, LookupResult &R, TemplateParameterList *Params = FD->getTemplateParameters(); if (Params->size() == 1) { IsTemplate = true; - if (!Params->getParam(0)->isTemplateParameterPack() && !StringLit) { + NamedDecl *Param = Params->getParam(0); + if (!Param->isTemplateParameterPack() && !StringLit) { // Implied but not stated: user-defined integer and floating literals // only ever use numeric literal operator templates, not templates // taking a parameter of class type. @@ -3716,6 +3721,7 @@ Sema::LookupLiteralOperator(Scope *S, LookupResult &R, if (StringLit) { SFINAETrap Trap(*this); CheckTemplateArgumentInfo CTAI; + CheckTemplateParameterRAII CTP(*this, Param); TemplateArgumentLoc Arg(TemplateArgument(StringLit), StringLit); if (CheckTemplateArgument( Params->getParam(0), Arg, FD, R.getNameLoc(), R.getNameLoc(), diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 2d2f8ddf4652b..4fe6bf5099a64 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -11,11 +11,12 @@ /// //===----------------------------------------------------------------------===// +#include "clang/Sema/SemaOpenACC.h" +#include "clang/AST/DeclOpenACC.h" #include "clang/AST/StmtOpenACC.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/OpenACCKinds.h" #include "clang/Sema/Sema.h" -#include "clang/Sema/SemaOpenACC.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Casting.h" @@ -80,6 +81,9 @@ bool PreserveLoopRAIIDepthInAssociatedStmtRAII(OpenACCDirectiveKind DK) { case OpenACCDirectiveKind::HostData: case OpenACCDirectiveKind::Atomic: return true; + case OpenACCDirectiveKind::Cache: + case OpenACCDirectiveKind::Routine: + case OpenACCDirectiveKind::Declare: case OpenACCDirectiveKind::EnterData: case OpenACCDirectiveKind::ExitData: case OpenACCDirectiveKind::Wait: @@ -88,7 +92,6 @@ bool PreserveLoopRAIIDepthInAssociatedStmtRAII(OpenACCDirectiveKind DK) { case OpenACCDirectiveKind::Set: case OpenACCDirectiveKind::Update: llvm_unreachable("Doesn't have an associated stmt"); - default: case OpenACCDirectiveKind::Invalid: llvm_unreachable("Unhandled directive kind?"); } @@ -311,40 +314,8 @@ void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K, SemaRef.PushExpressionEvaluationContext( Sema::ExpressionEvaluationContext::PotentiallyEvaluated); - switch (K) { - case OpenACCDirectiveKind::Invalid: - // Nothing to do here, an invalid kind has nothing we can check here. We - // want to continue parsing clauses as far as we can, so we will just - // ensure that we can still work and don't check any construct-specific - // rules anywhere. - break; - case OpenACCDirectiveKind::Parallel: - case OpenACCDirectiveKind::Serial: - case OpenACCDirectiveKind::Kernels: - case OpenACCDirectiveKind::ParallelLoop: - case OpenACCDirectiveKind::SerialLoop: - case OpenACCDirectiveKind::KernelsLoop: - case OpenACCDirectiveKind::Loop: - case OpenACCDirectiveKind::Data: - case OpenACCDirectiveKind::EnterData: - case OpenACCDirectiveKind::ExitData: - case OpenACCDirectiveKind::HostData: - case OpenACCDirectiveKind::Init: - case OpenACCDirectiveKind::Shutdown: - case OpenACCDirectiveKind::Set: - case OpenACCDirectiveKind::Update: - case OpenACCDirectiveKind::Atomic: - // Nothing to do here, there is no real legalization that needs to happen - // here as these constructs do not take any arguments. - break; - case OpenACCDirectiveKind::Wait: - // Nothing really to do here, the arguments to the 'wait' should have - // already been handled by the time we get here. - break; - default: - Diag(DirLoc, diag::warn_acc_construct_unimplemented) << K; - break; - } + // There is nothing do do here as all we have at this point is the name of the + // construct itself. } ExprResult SemaOpenACC::ActOnIntExpr(OpenACCDirectiveKind DK, @@ -478,15 +449,68 @@ bool SemaOpenACC::CheckVarIsPointerType(OpenACCClauseKind ClauseKind, return false; } -ExprResult SemaOpenACC::ActOnVar(OpenACCClauseKind CK, Expr *VarExpr) { +ExprResult SemaOpenACC::ActOnCacheVar(Expr *VarExpr) { + Expr *CurVarExpr = VarExpr->IgnoreParenImpCasts(); + if (!isa(CurVarExpr)) { + Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref_cache); + return ExprError(); + } + + // It isn't clear what 'simple array element or simple subarray' means, so we + // will just allow arbitrary depth. + while (isa(CurVarExpr)) { + if (auto *SubScrpt = dyn_cast(CurVarExpr)) + CurVarExpr = SubScrpt->getBase()->IgnoreParenImpCasts(); + else + CurVarExpr = + cast(CurVarExpr)->getBase()->IgnoreParenImpCasts(); + } + + // References to a VarDecl are fine. + if (const auto *DRE = dyn_cast(CurVarExpr)) { + if (isa( + DRE->getFoundDecl()->getCanonicalDecl())) + return VarExpr; + } + + if (const auto *ME = dyn_cast(CurVarExpr)) { + if (isa(ME->getMemberDecl()->getCanonicalDecl())) { + return VarExpr; + } + } + + // Nothing really we can do here, as these are dependent. So just return they + // are valid. + if (isa(CurVarExpr)) + return VarExpr; + + // There isn't really anything we can do in the case of a recovery expr, so + // skip the diagnostic rather than produce a confusing diagnostic. + if (isa(CurVarExpr)) + return ExprError(); + + Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref_cache); + return ExprError(); +} +ExprResult SemaOpenACC::ActOnVar(OpenACCDirectiveKind DK, OpenACCClauseKind CK, + Expr *VarExpr) { + // This has unique enough restrictions that we should split it to a separate + // function. + if (DK == OpenACCDirectiveKind::Cache) + return ActOnCacheVar(VarExpr); + Expr *CurVarExpr = VarExpr->IgnoreParenImpCasts(); // 'use_device' doesn't allow array subscript or array sections. // OpenACC3.3 2.8: // A 'var' in a 'use_device' clause must be the name of a variable or array. - if (CK == OpenACCClauseKind::UseDevice && + // OpenACC3.3 2.13: + // A 'var' in a 'declare' directive must be a variable or array name. + if ((CK == OpenACCClauseKind::UseDevice || + DK == OpenACCDirectiveKind::Declare) && isa(CurVarExpr)) { - Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref_use_device); + Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref_use_device_declare) + << (DK == OpenACCDirectiveKind::Declare); return ExprError(); } @@ -510,20 +534,30 @@ ExprResult SemaOpenACC::ActOnVar(OpenACCClauseKind CK, Expr *VarExpr) { // If CK is a Reduction, this special cases for OpenACC3.3 2.5.15: "A var in a // reduction clause must be a scalar variable name, an aggregate variable // name, an array element, or a subarray. - // If CK is a 'use_device', this also isn't valid, as it isn' the name of a - // variable or array. + // If CK is a 'use_device', this also isn't valid, as it isn't the name of a + // variable or array, if not done as a member expr. // A MemberExpr that references a Field is valid for other clauses. - if (CK != OpenACCClauseKind::Reduction && - CK != OpenACCClauseKind::UseDevice) { - if (const auto *ME = dyn_cast(CurVarExpr)) { - if (isa(ME->getMemberDecl()->getCanonicalDecl())) + if (const auto *ME = dyn_cast(CurVarExpr)) { + if (isa(ME->getMemberDecl()->getCanonicalDecl())) { + if (DK == OpenACCDirectiveKind::Declare || + CK == OpenACCClauseKind::Reduction || + CK == OpenACCClauseKind::UseDevice) { + + // We can allow 'member expr' if the 'this' is implicit in the case of + // declare, reduction, and use_device. + const auto *This = dyn_cast(ME->getBase()); + if (This && This->isImplicit()) + return VarExpr; + } else { return VarExpr; + } } } - // Referring to 'this' is ok for the most part, but for 'use_device' doesn't - // fall into 'variable or array name' - if (CK != OpenACCClauseKind::UseDevice && isa(CurVarExpr)) + // Referring to 'this' is ok for the most part, but for 'use_device'/'declare' + // doesn't fall into 'variable or array name' + if (CK != OpenACCClauseKind::UseDevice && + DK != OpenACCDirectiveKind::Declare && isa(CurVarExpr)) return VarExpr; // Nothing really we can do here, as these are dependent. So just return they @@ -538,8 +572,12 @@ ExprResult SemaOpenACC::ActOnVar(OpenACCClauseKind CK, Expr *VarExpr) { if (isa(CurVarExpr)) return ExprError(); - if (CK == OpenACCClauseKind::UseDevice) - Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref_use_device); + if (DK == OpenACCDirectiveKind::Declare) + Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref_use_device_declare) + << /*declare*/ 1; + else if (CK == OpenACCClauseKind::UseDevice) + Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref_use_device_declare) + << /*use_device*/ 0; else Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref) << (CK != OpenACCClauseKind::Reduction); @@ -1408,11 +1446,117 @@ std::string GetListOfClauses(llvm::ArrayRef Clauses) { OS << " or \'" << Clauses.back() << '\''; return Output; } + +// Helper that should mirror ActOnRoutineName to get the FunctionDecl out for +// magic-static checking. +FunctionDecl *getFunctionFromRoutineName(Expr *RoutineName) { + if (!RoutineName) + return nullptr; + RoutineName = RoutineName->IgnoreParenImpCasts(); + if (isa(RoutineName)) { + return nullptr; + } else if (isa( + RoutineName)) { + return nullptr; + } else if (auto *DRE = dyn_cast(RoutineName)) { + ValueDecl *VD = DRE->getDecl(); + + if (auto *FD = dyn_cast(VD)) + return FD; + + // Allow lambdas. + if (auto *VarD = dyn_cast(VD)) { + if (auto *RD = VarD->getType()->getAsCXXRecordDecl()) { + if (RD->isGenericLambda()) + return nullptr; + if (RD->isLambda()) + return RD->getLambdaCallOperator(); + } + } + return nullptr; + } else if (isa(RoutineName)) { + return nullptr; + } + return nullptr; +} + } // namespace +ExprResult SemaOpenACC::ActOnRoutineName(Expr *RoutineName) { + assert(RoutineName && "Routine name cannot be null here"); + RoutineName = RoutineName->IgnoreParenImpCasts(); + + if (isa(RoutineName)) { + // This has already been diagnosed, so we can skip it. + return ExprError(); + } else if (isa( + RoutineName)) { + // These are dependent and we can't really check them, so delay until + // instantiation. + return RoutineName; + } else if (const auto *DRE = dyn_cast(RoutineName)) { + const ValueDecl *VD = DRE->getDecl(); + + if (isa(VD)) + return RoutineName; + + // Allow lambdas. + if (const auto *VarD = dyn_cast(VD)) { + if (const auto *RD = VarD->getType()->getAsCXXRecordDecl()) { + if (RD->isGenericLambda()) { + Diag(RoutineName->getBeginLoc(), diag::err_acc_routine_overload_set) + << RoutineName; + return ExprError(); + } + if (RD->isLambda()) + return RoutineName; + } else if (VarD->getType()->isDependentType()) { + // If this is a dependent variable, it might be a lambda. So we just + // accept this and catch it next time. + return RoutineName; + } + } + + Diag(RoutineName->getBeginLoc(), diag::err_acc_routine_not_func) + << RoutineName; + return ExprError(); + } else if (isa(RoutineName)) { + // This happens in function templates, even when the template arguments are + // fully specified. We could possibly do some sort of matching to make sure + // that this is looked up/deduced, but GCC does not do this, so there + // doesn't seem to be a good reason for us to do it either. + Diag(RoutineName->getBeginLoc(), diag::err_acc_routine_overload_set) + << RoutineName; + return ExprError(); + } + + Diag(RoutineName->getBeginLoc(), diag::err_acc_routine_not_func) + << RoutineName; + return ExprError(); +} +void SemaOpenACC::ActOnVariableDeclarator(VarDecl *VD) { + if (!VD->isStaticLocal()) + return; + + if (const auto *FD = dyn_cast(getCurContext())) { + if (const auto *A = FD->getAttr()) { + Diag(VD->getBeginLoc(), diag::err_acc_magic_static_in_routine); + Diag(A->getLocation(), diag::note_acc_construct_here) + << OpenACCDirectiveKind::Routine; + } + MagicStaticLocs.insert({FD, VD->getBeginLoc()}); + } +} + bool SemaOpenACC::ActOnStartStmtDirective( OpenACCDirectiveKind K, SourceLocation StartLoc, ArrayRef Clauses) { + + // Declaration directives an appear in a statement location, so call into that + // function here. + if (K == OpenACCDirectiveKind::Declare || K == OpenACCDirectiveKind::Routine) + return ActOnStartDeclDirective(K, StartLoc, Clauses); + SemaRef.DiscardCleanupsInEvaluationContext(); SemaRef.PopExpressionEvaluationContext(); @@ -1529,8 +1673,6 @@ StmtResult SemaOpenACC::ActOnEndStmtDirective( SourceLocation EndLoc, ArrayRef Clauses, StmtResult AssocStmt) { switch (K) { - default: - return StmtEmpty(); case OpenACCDirectiveKind::Invalid: return StmtError(); case OpenACCDirectiveKind::Parallel: @@ -1597,6 +1739,22 @@ StmtResult SemaOpenACC::ActOnEndStmtDirective( getASTContext(), StartLoc, DirLoc, AtomicKind, EndLoc, AssocStmt.isUsable() ? AssocStmt.get() : nullptr); } + case OpenACCDirectiveKind::Cache: { + assert(Clauses.empty() && "Cache doesn't allow clauses"); + return OpenACCCacheConstruct::Create(getASTContext(), StartLoc, DirLoc, + LParenLoc, MiscLoc, Exprs, RParenLoc, + EndLoc); + } + case OpenACCDirectiveKind::Routine: + case OpenACCDirectiveKind::Declare: { + // Declare and routine arei declaration directives, but can be used here as + // long as we wrap it in a DeclStmt. So make sure we do that here. + DeclGroupRef DR = ActOnEndDeclDirective( + K, StartLoc, DirLoc, LParenLoc, + (Exprs.empty() ? nullptr : Exprs.front()), RParenLoc, EndLoc, Clauses); + + return SemaRef.ActOnDeclStmt(DeclGroupPtrTy::make(DR), StartLoc, EndLoc); + } } llvm_unreachable("Unhandled case in directive handling?"); } @@ -1614,6 +1772,7 @@ StmtResult SemaOpenACC::ActOnAssociatedStmt( case OpenACCDirectiveKind::Init: case OpenACCDirectiveKind::Shutdown: case OpenACCDirectiveKind::Set: + case OpenACCDirectiveKind::Cache: llvm_unreachable( "these don't have associated statements, so shouldn't get here"); case OpenACCDirectiveKind::Atomic: @@ -1673,17 +1832,86 @@ StmtResult SemaOpenACC::ActOnAssociatedStmt( llvm_unreachable("Invalid associated statement application"); } -bool SemaOpenACC::ActOnStartDeclDirective(OpenACCDirectiveKind K, - SourceLocation StartLoc) { +bool SemaOpenACC::ActOnStartDeclDirective( + OpenACCDirectiveKind K, SourceLocation StartLoc, + ArrayRef Clauses) { // OpenCC3.3 2.1 (line 889) // A program must not depend on the order of evaluation of expressions in // clause arguments or on any side effects of the evaluations. SemaRef.DiscardCleanupsInEvaluationContext(); SemaRef.PopExpressionEvaluationContext(); + + if (K == OpenACCDirectiveKind::Routine && + llvm::find_if(Clauses, + llvm::IsaPred) == + Clauses.end()) + return Diag(StartLoc, diag::err_acc_construct_one_clause_of) + << K + << GetListOfClauses({ + OpenACCClauseKind::Gang, + OpenACCClauseKind::Worker, + OpenACCClauseKind::Vector, + OpenACCClauseKind::Seq, + }); + return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/false); } -DeclGroupRef SemaOpenACC::ActOnEndDeclDirective() { return DeclGroupRef{}; } +DeclGroupRef SemaOpenACC::ActOnEndDeclDirective( + OpenACCDirectiveKind K, SourceLocation StartLoc, SourceLocation DirLoc, + SourceLocation LParenLoc, Expr *FuncRef, SourceLocation RParenLoc, + SourceLocation EndLoc, ArrayRef Clauses) { + switch (K) { + default: + case OpenACCDirectiveKind::Invalid: + return DeclGroupRef{}; + case OpenACCDirectiveKind::Declare: { + // OpenACC3.3 2.13: At least one clause must appear on a declare directive. + if (Clauses.empty()) { + Diag(EndLoc, diag::err_acc_declare_required_clauses); + // No reason to add this to the AST, as we would just end up trying to + // instantiate this, which would double-diagnose here, which we wouldn't + // want to do. + return DeclGroupRef{}; + } + + auto *DeclareDecl = OpenACCDeclareDecl::Create( + getASTContext(), getCurContext(), StartLoc, DirLoc, EndLoc, Clauses); + DeclareDecl->setAccess(AS_public); + getCurContext()->addDecl(DeclareDecl); + return DeclGroupRef{DeclareDecl}; + } + case OpenACCDirectiveKind::Routine: { + // For now, diagnose that we don't support argument-less routine yet. + if (LParenLoc.isInvalid()) { + Diag(DirLoc, diag::warn_acc_routine_unimplemented); + return DeclGroupRef{}; + } + + auto *RoutineDecl = OpenACCRoutineDecl::Create( + getASTContext(), getCurContext(), StartLoc, DirLoc, LParenLoc, FuncRef, + RParenLoc, EndLoc, Clauses); + RoutineDecl->setAccess(AS_public); + getCurContext()->addDecl(RoutineDecl); + + // OpenACC 3.3 2.15: + // In C and C++, function static variables are not supported in functions to + // which a routine directive applies. + if (auto *FD = getFunctionFromRoutineName(FuncRef)) { + if (auto Itr = MagicStaticLocs.find(FD); Itr != MagicStaticLocs.end()) { + Diag(Itr->second, diag::err_acc_magic_static_in_routine); + Diag(DirLoc, diag::note_acc_construct_here) + << OpenACCDirectiveKind::Routine; + } + FD->addAttr(OpenACCRoutineAnnotAttr::Create(getASTContext(), DirLoc)); + } + + return DeclGroupRef{RoutineDecl}; + } + } + llvm_unreachable("unhandled case in directive handling?"); +} ExprResult SemaOpenACC::BuildOpenACCAsteriskSizeExpr(SourceLocation AsteriskLoc) { diff --git a/clang/lib/Sema/SemaOpenACCClause.cpp b/clang/lib/Sema/SemaOpenACCClause.cpp index 1e74f126c31ce..582681f247b31 100644 --- a/clang/lib/Sema/SemaOpenACCClause.cpp +++ b/clang/lib/Sema/SemaOpenACCClause.cpp @@ -10,8 +10,9 @@ /// //===----------------------------------------------------------------------===// -#include "clang/AST/OpenACCClause.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/OpenACCClause.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/OpenACCKinds.h" #include "clang/Sema/SemaOpenACC.h" @@ -192,6 +193,7 @@ bool doesClauseApplyToDirective(OpenACCDirectiveKind DirectiveKind, case OpenACCDirectiveKind::Kernels: case OpenACCDirectiveKind::Data: case OpenACCDirectiveKind::EnterData: + case OpenACCDirectiveKind::Declare: case OpenACCDirectiveKind::ParallelLoop: case OpenACCDirectiveKind::SerialLoop: case OpenACCDirectiveKind::KernelsLoop: @@ -424,6 +426,22 @@ bool doesClauseApplyToDirective(OpenACCDirectiveKind DirectiveKind, return false; } } + case OpenACCClauseKind::Link: { + switch (DirectiveKind) { + case OpenACCDirectiveKind::Declare: + return true; + default: + return false; + } + } + case OpenACCClauseKind::DeviceResident: { + switch (DirectiveKind) { + case OpenACCDirectiveKind::Declare: + return true; + default: + return false; + } + } case OpenACCClauseKind::UseDevice: { switch (DirectiveKind) { @@ -457,6 +475,14 @@ bool doesClauseApplyToDirective(OpenACCDirectiveKind DirectiveKind, return false; } } + case OpenACCClauseKind::NoHost: { + switch (DirectiveKind) { + case OpenACCDirectiveKind::Routine: + return true; + default: + return false; + } + } } default: @@ -588,8 +614,17 @@ bool checkValidAfterDeviceType( // with the one being currently implemented/only updated after the entire // construct has been implemented. bool isDirectiveKindImplemented(OpenACCDirectiveKind DK) { - return DK != OpenACCDirectiveKind::Declare && - DK != OpenACCDirectiveKind::Routine; + return DK != OpenACCDirectiveKind::Routine; +} + +// GCC looks through linkage specs, but not the other transparent declaration +// contexts for 'declare' restrictions, so this helper function helps get us +// through that. +const DeclContext *removeLinkageSpecDC(const DeclContext *DC) { + while (isa(DC)) + DC = DC->getParent(); + + return DC; } class SemaOpenACCClauseVisitor { @@ -606,9 +641,18 @@ class SemaOpenACCClauseVisitor { // OpenACC 3.3 2.9: // A 'gang', 'worker', or 'vector' clause may not appear if a 'seq' clause // appears. - bool DiagIfSeqClause(SemaOpenACC::OpenACCParsedClause &Clause) { + // -also- + // OpenACC3.3 2.15: (routine) + // Exactly one of the 'gang', 'worker', 'vector' or 'seq' clauses must appear. + bool + DiagGangWorkerVectorSeqConflict(SemaOpenACC::OpenACCParsedClause &Clause) { const auto *Itr = - llvm::find_if(ExistingClauses, llvm::IsaPred); + Clause.getDirectiveKind() == OpenACCDirectiveKind::Routine + ? llvm::find_if( + ExistingClauses, + llvm::IsaPred) + : llvm::find_if(ExistingClauses, llvm::IsaPred); if (Itr != ExistingClauses.end()) { SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_clause_cannot_combine) @@ -1006,16 +1050,15 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitNoCreateClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitPresentClause( SemaOpenACC::OpenACCParsedClause &Clause) { - // Restrictions only properly implemented on 'compute'/'combined'/'data' - // constructs, and 'compute'/'combined'/'data' constructs are the only - // construct that can do anything with this yet, so skip/treat as - // unimplemented in this case. - if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) - return isNotImplemented(); // ActOnVar ensured that everything is a valid variable reference, so there // really isn't anything to do here. GCC does some duplicate-finding, though // it isn't apparent in the standard where this is justified. + // 'declare' has some restrictions that need to be enforced separately, so + // check it here. + if (SemaRef.CheckDeclareClause(Clause)) + return nullptr; + return OpenACCPresentClause::Create(Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getVarList(), Clause.getEndLoc()); @@ -1045,33 +1088,58 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitDeviceClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyClause( SemaOpenACC::OpenACCParsedClause &Clause) { - // Restrictions only properly implemented on 'compute'/'combined'/'data' - // constructs, and 'compute'/'combined'/'data' constructs are the only - // construct that can do anything with this yet, so skip/treat as - // unimplemented in this case. - if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) - return isNotImplemented(); // ActOnVar ensured that everything is a valid variable reference, so there // really isn't anything to do here. GCC does some duplicate-finding, though // it isn't apparent in the standard where this is justified. + // 'declare' has some restrictions that need to be enforced separately, so + // check it here. + if (SemaRef.CheckDeclareClause(Clause)) + return nullptr; + return OpenACCCopyClause::Create( Ctx, Clause.getClauseKind(), Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getVarList(), Clause.getEndLoc()); } +OpenACCClause *SemaOpenACCClauseVisitor::VisitLinkClause( + SemaOpenACC::OpenACCParsedClause &Clause) { + // 'declare' has some restrictions that need to be enforced separately, so + // check it here. + if (SemaRef.CheckDeclareClause(Clause)) + return nullptr; + + Clause.setVarListDetails(SemaRef.CheckLinkClauseVarList(Clause.getVarList()), + /*IsReadOnly=*/false, /*IsZero=*/false); + + return OpenACCLinkClause::Create(Ctx, Clause.getBeginLoc(), + Clause.getLParenLoc(), Clause.getVarList(), + Clause.getEndLoc()); +} + +OpenACCClause *SemaOpenACCClauseVisitor::VisitDeviceResidentClause( + SemaOpenACC::OpenACCParsedClause &Clause) { + // 'declare' has some restrictions that need to be enforced separately, so + // check it here. + if (SemaRef.CheckDeclareClause(Clause)) + return nullptr; + + return OpenACCDeviceResidentClause::Create( + Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getVarList(), + Clause.getEndLoc()); +} + OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyInClause( SemaOpenACC::OpenACCParsedClause &Clause) { - // Restrictions only properly implemented on 'compute'/'combined'/'data' - // constructs, and 'compute'/'combined'/'data' constructs are the only - // construct that can do anything with this yet, so skip/treat as - // unimplemented in this case. - if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) - return isNotImplemented(); // ActOnVar ensured that everything is a valid variable reference, so there // really isn't anything to do here. GCC does some duplicate-finding, though // it isn't apparent in the standard where this is justified. + // 'declare' has some restrictions that need to be enforced separately, so + // check it here. + if (SemaRef.CheckDeclareClause(Clause)) + return nullptr; + return OpenACCCopyInClause::Create( Ctx, Clause.getClauseKind(), Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.isReadOnly(), Clause.getVarList(), Clause.getEndLoc()); @@ -1079,16 +1147,15 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyInClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyOutClause( SemaOpenACC::OpenACCParsedClause &Clause) { - // Restrictions only properly implemented on 'compute'/'combined'/'data' - // constructs, and 'compute'/'combined'/'data' constructs are the only - // construct that can do anything with this yet, so skip/treat as - // unimplemented in this case. - if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) - return isNotImplemented(); // ActOnVar ensured that everything is a valid variable reference, so there // really isn't anything to do here. GCC does some duplicate-finding, though // it isn't apparent in the standard where this is justified. + // 'declare' has some restrictions that need to be enforced separately, so + // check it here. + if (SemaRef.CheckDeclareClause(Clause)) + return nullptr; + return OpenACCCopyOutClause::Create( Ctx, Clause.getClauseKind(), Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.isZero(), Clause.getVarList(), Clause.getEndLoc()); @@ -1100,6 +1167,11 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitCreateClause( // really isn't anything to do here. GCC does some duplicate-finding, though // it isn't apparent in the standard where this is justified. + // 'declare' has some restrictions that need to be enforced separately, so + // check it here. + if (SemaRef.CheckDeclareClause(Clause)) + return nullptr; + return OpenACCCreateClause::Create( Ctx, Clause.getClauseKind(), Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.isZero(), Clause.getVarList(), Clause.getEndLoc()); @@ -1156,13 +1228,6 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitUseDeviceClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitDevicePtrClause( SemaOpenACC::OpenACCParsedClause &Clause) { - // Restrictions only properly implemented on 'compute'/'combined'/'data' - // constructs, and 'compute'/'combined'/'data' constructs are the only - // construct that can do anything with this yet, so skip/treat as - // unimplemented in this case. - if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) - return isNotImplemented(); - // ActOnVar ensured that everything is a valid variable reference, but we // still have to make sure it is a pointer type. llvm::SmallVector VarList{Clause.getVarList()}; @@ -1172,6 +1237,11 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitDevicePtrClause( Clause.setVarListDetails(VarList, /*IsReadOnly=*/false, /*IsZero=*/false); + // 'declare' has some restrictions that need to be enforced separately, so + // check it here. + if (SemaRef.CheckDeclareClause(Clause)) + return nullptr; + return OpenACCDevicePtrClause::Create( Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getVarList(), Clause.getEndLoc()); @@ -1211,7 +1281,8 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitAutoClause( // Only one of the seq, independent, and auto clauses may appear. const auto *Itr = llvm::find_if(ExistingClauses, - llvm::IsaPred); + llvm::IsaPred); if (Itr != ExistingClauses.end()) { SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict) << Clause.getClauseKind() << Clause.getDirectiveKind(); @@ -1223,12 +1294,19 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitAutoClause( Clause.getEndLoc()); } +OpenACCClause *SemaOpenACCClauseVisitor::VisitNoHostClause( + SemaOpenACC::OpenACCParsedClause &Clause) { + return OpenACCNoHostClause::Create(Ctx, Clause.getBeginLoc(), + Clause.getEndLoc()); +} + OpenACCClause *SemaOpenACCClauseVisitor::VisitIndependentClause( SemaOpenACC::OpenACCParsedClause &Clause) { // OpenACC 3.3 2.9: // Only one of the seq, independent, and auto clauses may appear. const auto *Itr = llvm::find_if( - ExistingClauses, llvm::IsaPred); + ExistingClauses, llvm::IsaPred); if (Itr != ExistingClauses.end()) { SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict) << Clause.getClauseKind() << Clause.getDirectiveKind(); @@ -1274,6 +1352,38 @@ ExprResult DiagIntArgInvalid(SemaOpenACC &S, Expr *E, StringRef TagKind, return ExprError(); } +ExprResult CheckGangDimExpr(SemaOpenACC &S, Expr *E) { + // OpenACC 3.3 2.9.2: When the parent compute construct is a parallel + // construct, or an orphaned loop construct, the gang clause behaves as + // follows. ... The dim argument must be a constant positive integer value + // 1, 2, or 3. + // -also- + // OpenACC 3.3 2.15: The 'dim' argument must be a constant positive integer + // with value 1, 2, or 3. + if (!E) + return ExprError(); + ExprResult Res = S.ActOnIntExpr(OpenACCDirectiveKind::Invalid, + OpenACCClauseKind::Gang, E->getBeginLoc(), E); + + if (!Res.isUsable()) + return Res; + + if (Res.get()->isInstantiationDependent()) + return Res; + + std::optional ICE = + Res.get()->getIntegerConstantExpr(S.getASTContext()); + + if (!ICE || *ICE <= 0 || ICE > 3) { + S.Diag(Res.get()->getBeginLoc(), diag::err_acc_gang_dim_value) + << ICE.has_value() << ICE.value_or(llvm::APSInt{}).getExtValue(); + return ExprError(); + } + + return ExprResult{ + ConstantExpr::Create(S.getASTContext(), Res.get(), APValue{*ICE})}; +} + ExprResult CheckGangParallelExpr(SemaOpenACC &S, OpenACCDirectiveKind DK, OpenACCDirectiveKind AssocKind, OpenACCGangKind GK, Expr *E) { @@ -1285,35 +1395,8 @@ ExprResult CheckGangParallelExpr(SemaOpenACC &S, OpenACCDirectiveKind DK, // construct, or an orphaned loop construct, the gang clause behaves as // follows. ... The num argument is not allowed. return DiagIntArgInvalid(S, E, GK, OpenACCClauseKind::Gang, DK, AssocKind); - case OpenACCGangKind::Dim: { - // OpenACC 3.3 2.9.2: When the parent compute construct is a parallel - // construct, or an orphaned loop construct, the gang clause behaves as - // follows. ... The dim argument must be a constant positive integer value - // 1, 2, or 3. - if (!E) - return ExprError(); - ExprResult Res = - S.ActOnIntExpr(OpenACCDirectiveKind::Invalid, OpenACCClauseKind::Gang, - E->getBeginLoc(), E); - - if (!Res.isUsable()) - return Res; - - if (Res.get()->isInstantiationDependent()) - return Res; - - std::optional ICE = - Res.get()->getIntegerConstantExpr(S.getASTContext()); - - if (!ICE || *ICE <= 0 || ICE > 3) { - S.Diag(Res.get()->getBeginLoc(), diag::err_acc_gang_dim_value) - << ICE.has_value() << ICE.value_or(llvm::APSInt{}).getExtValue(); - return ExprError(); - } - - return ExprResult{ - ConstantExpr::Create(S.getASTContext(), Res.get(), APValue{*ICE})}; - } + case OpenACCGangKind::Dim: + return CheckGangDimExpr(S, E); } llvm_unreachable("Unknown gang kind in gang parallel check"); } @@ -1378,21 +1461,32 @@ ExprResult CheckGangSerialExpr(SemaOpenACC &S, OpenACCDirectiveKind DK, llvm_unreachable("Unknown gang kind in gang serial check"); } +ExprResult CheckGangRoutineExpr(SemaOpenACC &S, OpenACCDirectiveKind DK, + OpenACCDirectiveKind AssocKind, + OpenACCGangKind GK, Expr *E) { + switch (GK) { + // Only 'dim' is allowed on a routine, so diallow num and static. + case OpenACCGangKind::Num: + case OpenACCGangKind::Static: + return DiagIntArgInvalid(S, E, GK, OpenACCClauseKind::Gang, DK, AssocKind); + case OpenACCGangKind::Dim: + return CheckGangDimExpr(S, E); + } + llvm_unreachable("Unknown gang kind in gang serial check"); +} + OpenACCClause *SemaOpenACCClauseVisitor::VisitVectorClause( SemaOpenACC::OpenACCParsedClause &Clause) { - if (DiagIfSeqClause(Clause)) + if (DiagGangWorkerVectorSeqConflict(Clause)) return nullptr; - // Restrictions only properly implemented on 'loop'/'combined' constructs, and - // it is the only construct that can do anything with this, so skip/treat as - // unimplemented for the routine constructs. - if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) - return isNotImplemented(); - Expr *IntExpr = Clause.getNumIntExprs() != 0 ? Clause.getIntExprs()[0] : nullptr; if (IntExpr) { - if (!isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) { + switch (Clause.getDirectiveKind()) { + default: + llvm_unreachable("Invalid directive kind for this clause"); + case OpenACCDirectiveKind::Loop: switch (SemaRef.getActiveComputeConstructInfo().Kind) { case OpenACCDirectiveKind::Invalid: case OpenACCDirectiveKind::Parallel: @@ -1428,34 +1522,38 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitVectorClause( default: llvm_unreachable("Non compute construct in active compute construct"); } - } else { - if (Clause.getDirectiveKind() == OpenACCDirectiveKind::SerialLoop) { - DiagIntArgInvalid(SemaRef, IntExpr, "length", OpenACCClauseKind::Vector, - Clause.getDirectiveKind(), - SemaRef.getActiveComputeConstructInfo().Kind); - IntExpr = nullptr; - } else if (Clause.getDirectiveKind() == - OpenACCDirectiveKind::KernelsLoop) { - const auto *Itr = llvm::find_if( - ExistingClauses, llvm::IsaPred); - if (Itr != ExistingClauses.end()) { - SemaRef.Diag(IntExpr->getBeginLoc(), diag::err_acc_num_arg_conflict) - << "length" << OpenACCClauseKind::Vector - << Clause.getDirectiveKind() - << HasAssocKind(Clause.getDirectiveKind(), - SemaRef.getActiveComputeConstructInfo().Kind) - << SemaRef.getActiveComputeConstructInfo().Kind - << OpenACCClauseKind::VectorLength; - SemaRef.Diag((*Itr)->getBeginLoc(), - diag::note_acc_previous_clause_here); + break; + case OpenACCDirectiveKind::KernelsLoop: { + const auto *Itr = llvm::find_if(ExistingClauses, + llvm::IsaPred); + if (Itr != ExistingClauses.end()) { + SemaRef.Diag(IntExpr->getBeginLoc(), diag::err_acc_num_arg_conflict) + << "length" << OpenACCClauseKind::Vector + << Clause.getDirectiveKind() + << HasAssocKind(Clause.getDirectiveKind(), + SemaRef.getActiveComputeConstructInfo().Kind) + << SemaRef.getActiveComputeConstructInfo().Kind + << OpenACCClauseKind::VectorLength; + SemaRef.Diag((*Itr)->getBeginLoc(), + diag::note_acc_previous_clause_here); - IntExpr = nullptr; - } + IntExpr = nullptr; } + break; + } + case OpenACCDirectiveKind::SerialLoop: + case OpenACCDirectiveKind::Routine: + DiagIntArgInvalid(SemaRef, IntExpr, "length", OpenACCClauseKind::Vector, + Clause.getDirectiveKind(), + SemaRef.getActiveComputeConstructInfo().Kind); + IntExpr = nullptr; + break; + case OpenACCDirectiveKind::ParallelLoop: + break; } } - if (!isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) { + if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Loop) { // OpenACC 3.3 2.9.4: The region of a loop with a 'vector' clause may not // contain a loop with a gang, worker, or vector clause unless within a // nested compute region. @@ -1478,20 +1576,17 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitVectorClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitWorkerClause( SemaOpenACC::OpenACCParsedClause &Clause) { - if (DiagIfSeqClause(Clause)) + if (DiagGangWorkerVectorSeqConflict(Clause)) return nullptr; - // Restrictions only properly implemented on 'loop'/'combined' constructs, and - // it is the only construct that can do anything with this, so skip/treat as - // unimplemented for the routine constructs. - if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) - return isNotImplemented(); - Expr *IntExpr = Clause.getNumIntExprs() != 0 ? Clause.getIntExprs()[0] : nullptr; if (IntExpr) { - if (!isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) { + switch (Clause.getDirectiveKind()) { + default: + llvm_unreachable("Invalid directive kind for this clause"); + case OpenACCDirectiveKind::Loop: switch (SemaRef.getActiveComputeConstructInfo().Kind) { case OpenACCDirectiveKind::Invalid: case OpenACCDirectiveKind::ParallelLoop: @@ -1525,35 +1620,35 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitWorkerClause( default: llvm_unreachable("Non compute construct in active compute construct"); } - } else { - if (Clause.getDirectiveKind() == OpenACCDirectiveKind::ParallelLoop || - Clause.getDirectiveKind() == OpenACCDirectiveKind::SerialLoop) { - DiagIntArgInvalid(SemaRef, IntExpr, OpenACCGangKind::Num, - OpenACCClauseKind::Worker, Clause.getDirectiveKind(), - SemaRef.getActiveComputeConstructInfo().Kind); - IntExpr = nullptr; - } else { - assert(Clause.getDirectiveKind() == OpenACCDirectiveKind::KernelsLoop && - "Unknown combined directive kind?"); - const auto *Itr = llvm::find_if(ExistingClauses, - llvm::IsaPred); - if (Itr != ExistingClauses.end()) { - SemaRef.Diag(IntExpr->getBeginLoc(), diag::err_acc_num_arg_conflict) - << "num" << OpenACCClauseKind::Worker << Clause.getDirectiveKind() - << HasAssocKind(Clause.getDirectiveKind(), - SemaRef.getActiveComputeConstructInfo().Kind) - << SemaRef.getActiveComputeConstructInfo().Kind - << OpenACCClauseKind::NumWorkers; - SemaRef.Diag((*Itr)->getBeginLoc(), - diag::note_acc_previous_clause_here); + break; + case OpenACCDirectiveKind::ParallelLoop: + case OpenACCDirectiveKind::SerialLoop: + case OpenACCDirectiveKind::Routine: + DiagIntArgInvalid(SemaRef, IntExpr, OpenACCGangKind::Num, + OpenACCClauseKind::Worker, Clause.getDirectiveKind(), + SemaRef.getActiveComputeConstructInfo().Kind); + IntExpr = nullptr; + break; + case OpenACCDirectiveKind::KernelsLoop: { + const auto *Itr = llvm::find_if(ExistingClauses, + llvm::IsaPred); + if (Itr != ExistingClauses.end()) { + SemaRef.Diag(IntExpr->getBeginLoc(), diag::err_acc_num_arg_conflict) + << "num" << OpenACCClauseKind::Worker << Clause.getDirectiveKind() + << HasAssocKind(Clause.getDirectiveKind(), + SemaRef.getActiveComputeConstructInfo().Kind) + << SemaRef.getActiveComputeConstructInfo().Kind + << OpenACCClauseKind::NumWorkers; + SemaRef.Diag((*Itr)->getBeginLoc(), + diag::note_acc_previous_clause_here); - IntExpr = nullptr; - } + IntExpr = nullptr; } } + } } - if (!isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) { + if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Loop) { // OpenACC 3.3 2.9.3: The region of a loop with a 'worker' clause may not // contain a loop with a gang or worker clause unless within a nested // compute region. @@ -1590,15 +1685,9 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitWorkerClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitGangClause( SemaOpenACC::OpenACCParsedClause &Clause) { - if (DiagIfSeqClause(Clause)) + if (DiagGangWorkerVectorSeqConflict(Clause)) return nullptr; - // Restrictions only properly implemented on 'loop' constructs, and it is - // the only construct that can do anything with this, so skip/treat as - // unimplemented for the combined constructs. - if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) - return isNotImplemented(); - // OpenACC 3.3 Section 2.9.11: A reduction clause may not appear on a loop // directive that has a gang clause and is within a compute construct that has // a num_gangs clause with more than one explicit argument. @@ -1666,7 +1755,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitGangClause( IntExprs.push_back(ER.get()); } - if (!isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) { + if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Loop) { // OpenACC 3.3 2.9.2: When the parent compute construct is a kernels // construct, the gang clause behaves as follows. ... The region of a loop // with a gang clause may not contain another loop with a gang clause unless @@ -1735,30 +1824,36 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitIfPresentClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitSeqClause( SemaOpenACC::OpenACCParsedClause &Clause) { - // Restrictions only properly implemented on 'loop' constructs and combined , - // and it is the only construct that can do anything with this, so skip/treat - // as unimplemented for the routine constructs. - if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) - return isNotImplemented(); - // OpenACC 3.3 2.9: - // Only one of the seq, independent, and auto clauses may appear. - const auto *Itr = - llvm::find_if(ExistingClauses, - llvm::IsaPred); - if (Itr != ExistingClauses.end()) { - SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict) - << Clause.getClauseKind() << Clause.getDirectiveKind(); - SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here); - return nullptr; + if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Routine) { + // OpenACC 3.3 2.9: + // Only one of the seq, independent, and auto clauses may appear. + const auto *Itr = + llvm::find_if(ExistingClauses, + llvm::IsaPred); + if (Itr != ExistingClauses.end()) { + SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict) + << Clause.getClauseKind() << Clause.getDirectiveKind(); + SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here); + return nullptr; + } } // OpenACC 3.3 2.9: // A 'gang', 'worker', or 'vector' clause may not appear if a 'seq' clause // appears. - Itr = llvm::find_if(ExistingClauses, - llvm::IsaPred); + // -also- + // OpenACC3.3 2.15: (routine) + // Exactly one of the 'gang', 'worker', 'vector' or 'seq' clauses must appear. + const auto *Itr = + Clause.getDirectiveKind() == OpenACCDirectiveKind::Routine + ? llvm::find_if(ExistingClauses, + llvm::IsaPred) + : llvm::find_if(ExistingClauses, + llvm::IsaPred); if (Itr != ExistingClauses.end()) { SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_clause_cannot_combine) @@ -2143,6 +2238,9 @@ SemaOpenACC::CheckGangExpr(ArrayRef ExistingClauses, case OpenACCDirectiveKind::KernelsLoop: return CheckGangKernelsExpr(*this, ExistingClauses, DK, ActiveComputeConstructInfo.Kind, GK, E); + case OpenACCDirectiveKind::Routine: + return CheckGangRoutineExpr(*this, DK, ActiveComputeConstructInfo.Kind, GK, + E); case OpenACCDirectiveKind::Loop: switch (ActiveComputeConstructInfo.Kind) { case OpenACCDirectiveKind::Invalid: @@ -2162,8 +2260,6 @@ SemaOpenACC::CheckGangExpr(ArrayRef ExistingClauses, llvm_unreachable("Non compute construct in active compute construct?"); } default: - // TODO: OpenACC: when we implement this on 'routine', we'll have to - // implement its checking here. llvm_unreachable("Invalid directive kind for a Gang clause"); } llvm_unreachable("Compute construct directive not handled?"); @@ -2175,31 +2271,34 @@ SemaOpenACC::CheckGangClause(OpenACCDirectiveKind DirKind, SourceLocation BeginLoc, SourceLocation LParenLoc, ArrayRef GangKinds, ArrayRef IntExprs, SourceLocation EndLoc) { - // OpenACC 3.3 2.9.11: A reduction clause may not appear on a loop directive - // that has a gang clause with a dim: argument whose value is greater than 1. - - const auto *ReductionItr = - llvm::find_if(ExistingClauses, llvm::IsaPred); - - if (ReductionItr != ExistingClauses.end()) { - const auto GangZip = llvm::zip_equal(GangKinds, IntExprs); - const auto GangItr = llvm::find_if(GangZip, [](const auto &Tuple) { - return std::get<0>(Tuple) == OpenACCGangKind::Dim; - }); - - if (GangItr != GangZip.end()) { - const Expr *DimExpr = std::get<1>(*GangItr); - - assert( - (DimExpr->isInstantiationDependent() || isa(DimExpr)) && - "Improperly formed gang argument"); - if (const auto *DimVal = dyn_cast(DimExpr); - DimVal && DimVal->getResultAsAPSInt() > 1) { - Diag(DimVal->getBeginLoc(), diag::err_acc_gang_reduction_conflict) - << /*gang/reduction=*/0 << DirKind; - Diag((*ReductionItr)->getBeginLoc(), - diag::note_acc_previous_clause_here); - return nullptr; + // Reduction isn't possible on 'routine' so we don't bother checking it here. + if (DirKind != OpenACCDirectiveKind::Routine) { + // OpenACC 3.3 2.9.11: A reduction clause may not appear on a loop directive + // that has a gang clause with a dim: argument whose value is greater + // than 1. + const auto *ReductionItr = + llvm::find_if(ExistingClauses, llvm::IsaPred); + + if (ReductionItr != ExistingClauses.end()) { + const auto GangZip = llvm::zip_equal(GangKinds, IntExprs); + const auto GangItr = llvm::find_if(GangZip, [](const auto &Tuple) { + return std::get<0>(Tuple) == OpenACCGangKind::Dim; + }); + + if (GangItr != GangZip.end()) { + const Expr *DimExpr = std::get<1>(*GangItr); + + assert((DimExpr->isInstantiationDependent() || + isa(DimExpr)) && + "Improperly formed gang argument"); + if (const auto *DimVal = dyn_cast(DimExpr); + DimVal && DimVal->getResultAsAPSInt() > 1) { + Diag(DimVal->getBeginLoc(), diag::err_acc_gang_reduction_conflict) + << /*gang/reduction=*/0 << DirKind; + Diag((*ReductionItr)->getBeginLoc(), + diag::note_acc_previous_clause_here); + return nullptr; + } } } } @@ -2243,3 +2342,136 @@ OpenACCClause *SemaOpenACC::CheckReductionClause( getASTContext(), BeginLoc, LParenLoc, ReductionOp, Vars, EndLoc); return Ret; } + +llvm::SmallVector +SemaOpenACC::CheckLinkClauseVarList(ArrayRef VarExprs) { + const DeclContext *DC = removeLinkageSpecDC(getCurContext()); + + // Link has no special restrictions on its var list unless it is not at NS/TU + // scope. + if (isa(DC)) + return llvm::SmallVector(VarExprs); + + llvm::SmallVector NewVarList; + + for (Expr *VarExpr : VarExprs) { + if (isa(VarExpr)) { + NewVarList.push_back(VarExpr); + continue; + } + + // Field decls can't be global, nor extern, and declare can't refer to + // non-static fields in class-scope, so this always fails the scope check. + // BUT for now we add this so it gets diagnosed by the general 'declare' + // rules. + if (isa(VarExpr)) { + NewVarList.push_back(VarExpr); + continue; + } + + const auto *DRE = cast(VarExpr); + const VarDecl *Var = dyn_cast(DRE->getDecl()); + + if (!Var || !Var->hasExternalStorage()) + Diag(VarExpr->getBeginLoc(), diag::err_acc_link_not_extern); + else + NewVarList.push_back(VarExpr); + } + + return NewVarList; +} +bool SemaOpenACC::CheckDeclareClause(SemaOpenACC::OpenACCParsedClause &Clause) { + + if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Declare) + return false; + + const DeclContext *DC = removeLinkageSpecDC(getCurContext()); + + // Whether this is 'create', 'copyin', 'deviceptr', 'device_resident', or + // 'link', which have 2 special rules. + bool IsSpecialClause = + Clause.getClauseKind() == OpenACCClauseKind::Create || + Clause.getClauseKind() == OpenACCClauseKind::CopyIn || + Clause.getClauseKind() == OpenACCClauseKind::DevicePtr || + Clause.getClauseKind() == OpenACCClauseKind::DeviceResident || + Clause.getClauseKind() == OpenACCClauseKind::Link; + + // OpenACC 3.3 2.13: + // In C or C++ global or namespace scope, only 'create', + // 'copyin', 'deviceptr', 'device_resident', or 'link' clauses are + // allowed. + if (!IsSpecialClause && isa(DC)) { + return Diag(Clause.getBeginLoc(), diag::err_acc_declare_clause_at_global) + << Clause.getClauseKind(); + } + + llvm::SmallVector FilteredVarList; + const DeclaratorDecl *CurDecl = nullptr; + for (Expr *VarExpr : Clause.getVarList()) { + if (isa(VarExpr)) { + // There isn't really anything we can do here, so we add them anyway and + // we can check them again when we instantiate this. + } else if (const auto *MemExpr = dyn_cast(VarExpr)) { + FieldDecl *FD = + cast(MemExpr->getMemberDecl()->getCanonicalDecl()); + CurDecl = FD; + + if (removeLinkageSpecDC( + FD->getLexicalDeclContext()->getPrimaryContext()) != DC) { + Diag(MemExpr->getBeginLoc(), diag::err_acc_declare_same_scope) + << Clause.getClauseKind(); + continue; + } + } else { + const auto *DRE = cast(VarExpr); + const VarDecl *Var = dyn_cast(DRE->getDecl()); + if (Var) + CurDecl = Var->getCanonicalDecl(); + + // OpenACC3.3 2.13: + // A 'declare' directive must be in the same scope as the declaration of + // any var that appears in the clauses of the directive or any scope + // within a C/C++ function. + // We can't really check 'scope' here, so we check declaration context, + // which is a reasonable approximation, but misses scopes inside of + // functions. + if (removeLinkageSpecDC(Var->getCanonicalDecl() + ->getLexicalDeclContext() + ->getPrimaryContext()) != DC) { + Diag(VarExpr->getBeginLoc(), diag::err_acc_declare_same_scope) + << Clause.getClauseKind(); + continue; + } + // OpenACC3.3 2.13: + // C and C++ extern variables may only appear in 'create', + // 'copyin', 'deviceptr', 'device_resident', or 'link' clauses on a + // 'declare' directive. + if (!IsSpecialClause && Var && Var->hasExternalStorage()) { + Diag(VarExpr->getBeginLoc(), diag::err_acc_declare_extern) + << Clause.getClauseKind(); + continue; + } + + // OpenACC3.3 2.13: + // A var may appear at most once in all the clauses of declare + // directives for a function, subroutine, program, or module. + + if (CurDecl) { + auto Itr = DeclareVarReferences.find(CurDecl); + if (Itr != DeclareVarReferences.end()) { + Diag(VarExpr->getBeginLoc(), diag::err_acc_multiple_references) + << Clause.getClauseKind(); + Diag(Itr->second, diag::note_acc_previous_reference); + continue; + } else { + DeclareVarReferences[CurDecl] = VarExpr->getBeginLoc(); + } + } + } + FilteredVarList.push_back(VarExpr); + } + + Clause.setVarListDetails(FilteredVarList, Clause.isReadOnly(), + Clause.isZero()); + return false; +} diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 08586b4908dd4..d3c0534b4dd0b 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -5710,12 +5710,14 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType, // - if the initializer list has one element that is not itself an // initializer list, the implicit conversion sequence is the one // required to convert the element to the parameter type. + // Bail out on EmbedExpr as well since we never create EmbedExpr for a + // single integer. unsigned NumInits = From->getNumInits(); - if (NumInits == 1 && !isa(From->getInit(0))) - Result = TryCopyInitialization(S, From->getInit(0), ToType, - SuppressUserConversions, - InOverloadResolution, - AllowObjCWritebackConversion); + if (NumInits == 1 && !isa(From->getInit(0)) && + !isa(From->getInit(0))) + Result = TryCopyInitialization( + S, From->getInit(0), ToType, SuppressUserConversions, + InOverloadResolution, AllowObjCWritebackConversion); // - if the initializer list has no elements, the implicit conversion // sequence is the identity conversion. else if (NumInits == 0) { @@ -10427,7 +10429,8 @@ getMorePartialOrderingConstrained(Sema &S, FunctionDecl *Fn1, FunctionDecl *Fn2, /// candidate is a better candidate than the second (C++ 13.3.3p1). bool clang::isBetterOverloadCandidate( Sema &S, const OverloadCandidate &Cand1, const OverloadCandidate &Cand2, - SourceLocation Loc, OverloadCandidateSet::CandidateSetKind Kind) { + SourceLocation Loc, OverloadCandidateSet::CandidateSetKind Kind, + bool PartialOverloading) { // Define viable functions to be better candidates than non-viable // functions. if (!Cand2.Viable) @@ -10664,7 +10667,7 @@ bool clang::isBetterOverloadCandidate( : QualType{}, Obj2Context ? QualType(Obj2Context->getTypeForDecl(), 0) : QualType{}, - Cand1.isReversed() ^ Cand2.isReversed())) { + Cand1.isReversed() ^ Cand2.isReversed(), PartialOverloading)) { return BetterTemplate == Cand1.Function->getPrimaryTemplate(); } } diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp index 8a5037d045125..47660319fa3af 100644 --- a/clang/lib/Sema/SemaRISCV.cpp +++ b/clang/lib/Sema/SemaRISCV.cpp @@ -12,6 +12,7 @@ #include "clang/Sema/SemaRISCV.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetBuiltins.h" @@ -1475,6 +1476,26 @@ void SemaRISCV::handleInterruptAttr(Decl *D, const ParsedAttr &AL) { return; } + switch (Kind) { + default: + break; + case RISCVInterruptAttr::InterruptType::qcinest: + case RISCVInterruptAttr::InterruptType::qcinonest: { + const TargetInfo &TI = getASTContext().getTargetInfo(); + llvm::StringMap FunctionFeatureMap; + getASTContext().getFunctionFeatureMap(FunctionFeatureMap, + dyn_cast(D)); + + if (!TI.hasFeature("experimental-xqciint") && + !FunctionFeatureMap.lookup("experimental-xqciint")) { + Diag(AL.getLoc(), diag::err_riscv_attribute_interrupt_requires_extension) + << Str << "Xqciint"; + return; + } + break; + } + }; + D->addAttr(::new (getASTContext()) RISCVInterruptAttr(getASTContext(), AL, Kind)); } diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index d0b713f074c33..0a193b5299bcc 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -2269,10 +2269,11 @@ StmtResult Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc, for (auto *DI : DS->decls()) { if (VarDecl *VD = dyn_cast(DI)) { VarDeclSeen = true; - if (VD->isLocalVarDecl() && !VD->hasLocalStorage()) { - Diag(DI->getLocation(), diag::err_non_local_variable_decl_in_for); - DI->setInvalidDecl(); - } + if (VD->isLocalVarDecl() && !VD->hasLocalStorage()) + Diag(DI->getLocation(), + getLangOpts().C23 + ? diag::warn_c17_non_local_variable_decl_in_for + : diag::ext_c23_non_local_variable_decl_in_for); } else if (!NonVarSeen) { // Keep track of the first non-variable declaration we saw so that // we can diagnose if we don't see any variable declarations. This @@ -2284,7 +2285,9 @@ StmtResult Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc, // Diagnose if we saw a non-variable declaration but no variable // declarations. if (NonVarSeen && !VarDeclSeen) - Diag(NonVarSeen->getLocation(), diag::err_non_variable_decl_in_for); + Diag(NonVarSeen->getLocation(), + getLangOpts().C23 ? diag::warn_c17_non_variable_decl_in_for + : diag::ext_c23_non_variable_decl_in_for); } } diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 38fa3ff3ab5b4..0caabc6573361 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -869,9 +869,11 @@ void Sema::DiagnoseTemplateParameterShadow(SourceLocation Loc, Decl *PrevDecl, ? diag::ext_template_param_shadow : (SupportedForCompatibility ? diag::ext_compat_template_param_shadow : diag::err_template_param_shadow); - const auto *ND = cast(PrevDecl); + auto *ND = cast(PrevDecl); + CheckTemplateParameterRAII CTP(*this, ND); + // FIXME: Don't put the name in the diagnostic, unless there is no source + // location. Diag(Loc, DiagId) << ND->getDeclName(); - NoteTemplateParameterLocation(*ND); } TemplateDecl *Sema::AdjustDeclIfTemplate(Decl *&D) { @@ -4824,7 +4826,7 @@ TemplateNameKind Sema::ActOnTemplateName(Scope *S, } bool Sema::CheckTemplateTypeArgument( - TemplateTypeParmDecl *Param, TemplateArgumentLoc &AL, + TemplateArgumentLoc &AL, SmallVectorImpl &SugaredConverted, SmallVectorImpl &CanonicalConverted) { const TemplateArgument &Arg = AL.getArgument(); @@ -4880,7 +4882,6 @@ bool Sema::CheckTemplateTypeArgument( ? diag::ext_ms_template_type_arg_missing_typename : diag::err_template_arg_must_be_type_suggest) << FixItHint::CreateInsertion(Loc, "typename "); - NoteTemplateParameterLocation(*Param); // Recover by synthesizing a type using the location information that we // already have. @@ -4918,7 +4919,6 @@ bool Sema::CheckTemplateTypeArgument( // is not a type. SourceRange SR = AL.getSourceRange(); Diag(SR.getBegin(), diag::err_template_arg_must_be_type) << SR; - NoteTemplateParameterLocation(*Param); return true; } @@ -5208,8 +5208,8 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param, TemplateArgumentLoc &ArgLoc, CheckTemplateArgumentInfo &CTAI, CheckTemplateArgumentKind CTAK) { // Check template type parameters. - if (TemplateTypeParmDecl *TTP = dyn_cast(Param)) - return CheckTemplateTypeArgument(TTP, ArgLoc, CTAI.SugaredConverted, + if (isa(Param)) + return CheckTemplateTypeArgument(ArgLoc, CTAI.SugaredConverted, CTAI.CanonicalConverted); const TemplateArgument &Arg = ArgLoc.getArgument(); @@ -5354,8 +5354,6 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param, TemplateArgumentLoc &ArgLoc, // therefore cannot be a non-type template argument. Diag(ArgLoc.getLocation(), diag::err_template_arg_must_be_expr) << ArgLoc.getSourceRange(); - NoteTemplateParameterLocation(*Param); - return true; case TemplateArgument::Type: { @@ -5375,7 +5373,6 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param, TemplateArgumentLoc &ArgLoc, Diag(SR.getBegin(), diag::err_template_arg_nontype_ambig) << SR << T; else Diag(SR.getBegin(), diag::err_template_arg_must_be_expr) << SR; - NoteTemplateParameterLocation(*Param); return true; } @@ -5466,11 +5463,11 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param, TemplateArgumentLoc &ArgLoc, } /// Diagnose a missing template argument. -template +template static bool diagnoseMissingArgument(Sema &S, SourceLocation Loc, - TemplateDecl *TD, - const TemplateParmDecl *D, - TemplateArgumentListInfo &Args) { + TemplateDecl *TD, const TemplateParmDecl *D, + TemplateArgumentListInfo &Args, + bool MatchingTTP) { // Dig out the most recent declaration of the template parameter; there may be // declarations of the template that are more recent than TD. D = cast(cast(TD->getMostRecentDecl()) @@ -5488,16 +5485,12 @@ static bool diagnoseMissingArgument(Sema &S, SourceLocation Loc, return true; } + SourceLocation DiagLoc = Args.getRAngleLoc(); // FIXME: If there's a more recent default argument that *is* visible, // diagnose that it was declared too late. - - TemplateParameterList *Params = TD->getTemplateParameters(); - - S.Diag(Loc, diag::err_template_arg_list_different_arity) - << /*not enough args*/0 - << (int)S.getTemplateNameKindForDiagnostics(TemplateName(TD)) - << TD; - S.NoteTemplateLocation(*TD, Params->getSourceRange()); + S.Diag(DiagLoc.isValid() ? DiagLoc : Loc, + MatchingTTP ? diag::err_template_template_param_missing_param + : diag::err_template_param_missing_arg); return true; } @@ -5536,6 +5529,8 @@ bool Sema::CheckTemplateArgumentList( Param = ParamBegin; Param != ParamEnd; /* increment in loop */) { + CheckTemplateParameterRAII CTP1(*this, *Param); + if (size_t ParamIdx = Param - ParamBegin; DefaultArgs && ParamIdx >= DefaultArgs.StartPos) { // All written arguments should have been consumed by this point. @@ -5572,11 +5567,9 @@ bool Sema::CheckTemplateArgumentList( continue; } else if (ArgIdx == NumArgs && !PartialTemplateArgs) { // Not enough arguments for this parameter pack. - Diag(TemplateLoc, diag::err_template_arg_list_different_arity) - << /*not enough args*/0 - << (int)getTemplateNameKindForDiagnostics(TemplateName(Template)) - << Template; - NoteTemplateLocation(*Template, Params->getSourceRange()); + Diag(RAngleLoc, CTAI.MatchingTTP + ? diag::err_template_template_param_missing_param + : diag::err_template_param_missing_arg); return true; } } @@ -5589,8 +5582,10 @@ bool Sema::CheckTemplateArgumentList( if (ArgIsExpansion && CTAI.MatchingTTP) { SmallVector Args(ParamEnd - Param); + CTP1.Clear(); // Will continue processing parameters below. for (TemplateParameterList::iterator First = Param; Param != ParamEnd; ++Param) { + CheckTemplateParameterRAII CTP2(*this, *Param); TemplateArgument &Arg = Args[Param - First]; Arg = ArgLoc.getArgument(); if (!(*Param)->isTemplateParameterPack() || @@ -5631,7 +5626,6 @@ bool Sema::CheckTemplateArgumentList( diag::err_template_expansion_into_fixed_list) << (isa(Template) ? 1 : 0) << ArgLoc.getSourceRange(); - NoteTemplateParameterLocation(**Param); return true; } } @@ -5738,14 +5732,14 @@ bool Sema::CheckTemplateArgumentList( if (!HasDefaultArg) { if (TemplateTypeParmDecl *TTP = dyn_cast(*Param)) return diagnoseMissingArgument(*this, TemplateLoc, Template, TTP, - NewArgs); + NewArgs, CTAI.MatchingTTP); if (NonTypeTemplateParmDecl *NTTP = dyn_cast(*Param)) return diagnoseMissingArgument(*this, TemplateLoc, Template, NTTP, - NewArgs); + NewArgs, CTAI.MatchingTTP); return diagnoseMissingArgument(*this, TemplateLoc, Template, cast(*Param), - NewArgs); + NewArgs, CTAI.MatchingTTP); } return true; } @@ -5801,8 +5795,7 @@ bool Sema::CheckTemplateArgumentList( // If we have any leftover arguments, then there were too many arguments. // Complain and fail. if (ArgIdx < NumArgs) { - Diag(TemplateLoc, diag::err_template_arg_list_different_arity) - << /*too many args*/1 + Diag(TemplateLoc, diag::err_template_too_many_args) << (int)getTemplateNameKindForDiagnostics(TemplateName(Template)) << Template << SourceRange(NewArgs[ArgIdx].getLocation(), NewArgs.getRAngleLoc()); @@ -6227,8 +6220,6 @@ isNullPointerValueTemplateArgument(Sema &S, NonTypeTemplateParmDecl *Param, << Arg->getType() << Arg->getSourceRange(); for (unsigned I = 0, N = Notes.size(); I != N; ++I) S.Diag(Notes[I].first, Notes[I].second); - - S.NoteTemplateParameterLocation(*Param); return NPV_Error; } @@ -6253,8 +6244,7 @@ isNullPointerValueTemplateArgument(Sema &S, NonTypeTemplateParmDecl *Param, // The types didn't match, but we know we got a null pointer; complain, // then recover as if the types were correct. S.Diag(Arg->getExprLoc(), diag::err_template_arg_wrongtype_null_constant) - << Arg->getType() << ParamType << Arg->getSourceRange(); - S.NoteTemplateParameterLocation(*Param); + << Arg->getType() << ParamType << Arg->getSourceRange(); return NPV_NullPointer; } @@ -6263,8 +6253,7 @@ isNullPointerValueTemplateArgument(Sema &S, NonTypeTemplateParmDecl *Param, // We could just return NPV_NotNullPointer, but we can print a better // message with the information we have here. S.Diag(Arg->getExprLoc(), diag::err_template_arg_invalid) - << EvalResult.Val.getAsString(S.Context, ParamType); - S.NoteTemplateParameterLocation(*Param); + << EvalResult.Val.getAsString(S.Context, ParamType); return NPV_Error; } @@ -6276,7 +6265,6 @@ isNullPointerValueTemplateArgument(Sema &S, NonTypeTemplateParmDecl *Param, << ParamType << FixItHint::CreateInsertion(Arg->getBeginLoc(), Code) << FixItHint::CreateInsertion(S.getLocForEndOfToken(Arg->getEndLoc()), ")"); - S.NoteTemplateParameterLocation(*Param); return NPV_NullPointer; } @@ -6317,7 +6305,6 @@ static bool CheckTemplateArgumentIsCompatibleWithParameter( S.Diag(Arg->getBeginLoc(), diag::err_template_arg_ref_bind_ignores_quals) << ParamType << Arg->getType() << Arg->getSourceRange(); - S.NoteTemplateParameterLocation(*Param); return true; } } @@ -6335,7 +6322,6 @@ static bool CheckTemplateArgumentIsCompatibleWithParameter( else S.Diag(Arg->getBeginLoc(), diag::err_template_arg_not_convertible) << ArgIn->getType() << ParamType << Arg->getSourceRange(); - S.NoteTemplateParameterLocation(*Param); return true; } } @@ -6478,7 +6464,6 @@ static bool CheckTemplateArgumentAddressOfObjectOrFunction( if (!Entity) { S.Diag(Arg->getBeginLoc(), diag::err_template_arg_not_decl_ref) << Arg->getSourceRange(); - S.NoteTemplateParameterLocation(*Param); return true; } @@ -6486,7 +6471,6 @@ static bool CheckTemplateArgumentAddressOfObjectOrFunction( if (isa(Entity) || isa(Entity)) { S.Diag(Arg->getBeginLoc(), diag::err_template_arg_field) << Entity << Arg->getSourceRange(); - S.NoteTemplateParameterLocation(*Param); return true; } @@ -6495,7 +6479,6 @@ static bool CheckTemplateArgumentAddressOfObjectOrFunction( if (!Method->isStatic()) { S.Diag(Arg->getBeginLoc(), diag::err_template_arg_method) << Method << Arg->getSourceRange(); - S.NoteTemplateParameterLocation(*Param); return true; } } @@ -6535,7 +6518,6 @@ static bool CheckTemplateArgumentAddressOfObjectOrFunction( if (Var->getType()->isReferenceType()) { S.Diag(Arg->getBeginLoc(), diag::err_template_arg_reference_var) << Var->getType() << Arg->getSourceRange(); - S.NoteTemplateParameterLocation(*Param); return true; } @@ -6555,15 +6537,12 @@ static bool CheckTemplateArgumentAddressOfObjectOrFunction( if (!S.Context.hasSameUnqualifiedType(Entity->getType(), ParamType.getNonReferenceType())) { S.Diag(AddrOpLoc, diag::err_template_arg_address_of_non_pointer) - << ParamType; - S.NoteTemplateParameterLocation(*Param); + << ParamType; return true; } S.Diag(AddrOpLoc, diag::err_template_arg_address_of_non_pointer) - << ParamType - << FixItHint::CreateRemoval(AddrOpLoc); - S.NoteTemplateParameterLocation(*Param); + << ParamType << FixItHint::CreateRemoval(AddrOpLoc); ArgType = Entity->getType(); } @@ -6584,15 +6563,12 @@ static bool CheckTemplateArgumentAddressOfObjectOrFunction( ArgType = S.Context.getPointerType(Entity->getType()); if (!S.Context.hasSameUnqualifiedType(ArgType, ParamType)) { S.Diag(Arg->getBeginLoc(), diag::err_template_arg_not_address_of) - << ParamType; - S.NoteTemplateParameterLocation(*Param); + << ParamType; return true; } S.Diag(Arg->getBeginLoc(), diag::err_template_arg_not_address_of) - << ParamType << FixItHint::CreateInsertion(Arg->getBeginLoc(), "&"); - - S.NoteTemplateParameterLocation(*Param); + << ParamType << FixItHint::CreateInsertion(Arg->getBeginLoc(), "&"); } } @@ -6708,7 +6684,6 @@ CheckTemplateArgumentPointerToMember(Sema &S, NonTypeTemplateParmDecl *Param, // We can't perform this conversion. S.Diag(ResultArg->getBeginLoc(), diag::err_template_arg_not_convertible) << ResultArg->getType() << ParamType << ResultArg->getSourceRange(); - S.NoteTemplateParameterLocation(*Param); return true; } @@ -6814,7 +6789,6 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, diag::err_non_type_template_parm_type_deduction_failure) << Param->getDeclName() << Param->getType() << Arg->getType() << Arg->getSourceRange(); - NoteTemplateParameterLocation(*Param); return ExprError(); } } @@ -6823,10 +6797,8 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, // declaration, but here we'll pass the argument location because that's // where the parameter type is deduced. ParamType = CheckNonTypeTemplateParameterType(ParamType, Arg->getExprLoc()); - if (ParamType.isNull()) { - NoteTemplateParameterLocation(*Param); + if (ParamType.isNull()) return ExprError(); - } } // We should have already dropped all cv-qualifiers by now. @@ -6858,9 +6830,7 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, // not the type of the template argument deduced from A, against the // template parameter type. Diag(StartLoc, diag::err_deduced_non_type_template_arg_type_mismatch) - << Arg->getType() - << ParamType.getUnqualifiedType(); - NoteTemplateParameterLocation(*Param); + << Arg->getType() << ParamType.getUnqualifiedType(); return ExprError(); } @@ -6955,10 +6925,8 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, Arg, ParamType, PartialOrderingTTP ? CCEK_InjectedTTP : CCEK_TemplateArg, Param); assert(!ArgResult.isUnset()); - if (ArgResult.isInvalid()) { - NoteTemplateParameterLocation(*Param); + if (ArgResult.isInvalid()) return ExprError(); - } } else { ArgResult = Arg; } @@ -7105,7 +7073,6 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, if (!ArgType->isIntegralOrEnumerationType()) { Diag(Arg->getBeginLoc(), diag::err_template_arg_not_integral_or_enumeral) << ArgType << Arg->getSourceRange(); - NoteTemplateParameterLocation(*Param); return ExprError(); } else if (!Arg->isValueDependent()) { class TmplArgICEDiagnoser : public VerifyICEDiagnoser { @@ -7143,7 +7110,6 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, // We can't perform this conversion. Diag(Arg->getBeginLoc(), diag::err_template_arg_not_convertible) << Arg->getType() << ParamType << Arg->getSourceRange(); - NoteTemplateParameterLocation(*Param); return ExprError(); } @@ -7189,7 +7155,6 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, Diag(Arg->getBeginLoc(), diag::warn_template_arg_negative) << toString(OldValue, 10) << toString(Value, 10) << Param->getType() << Arg->getSourceRange(); - NoteTemplateParameterLocation(*Param); } // Complain if we overflowed the template parameter's type. @@ -7200,12 +7165,10 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, RequiredBits = OldValue.getActiveBits() + 1; else RequiredBits = OldValue.getSignificantBits(); - if (RequiredBits > AllowedBits) { + if (RequiredBits > AllowedBits) Diag(Arg->getBeginLoc(), diag::warn_template_arg_too_large) << toString(OldValue, 10) << toString(Value, 10) << Param->getType() << Arg->getSourceRange(); - NoteTemplateParameterLocation(*Param); - } } QualType T = ParamType->isEnumeralType() ? ParamType : IntegerType; @@ -7330,8 +7293,7 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, switch (isNullPointerValueTemplateArgument(*this, Param, ParamType, Arg)) { case NPV_NotNullPointer: Diag(Arg->getExprLoc(), diag::err_template_arg_not_convertible) - << Arg->getType() << ParamType; - NoteTemplateParameterLocation(*Param); + << Arg->getType() << ParamType; return ExprError(); case NPV_Error: @@ -7359,7 +7321,7 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, static void DiagnoseTemplateParameterListArityMismatch( Sema &S, TemplateParameterList *New, TemplateParameterList *Old, - Sema::TemplateParameterListEqualKind Kind, SourceLocation TemplateArgLoc); + Sema::TemplateParameterListEqualKind Kind); bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param, TemplateParameterList *Params, @@ -7429,7 +7391,6 @@ bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param, Diag(Arg.getLocation(), diag::err_template_template_parameter_not_at_least_as_constrained) << Template << Param << Arg.getSourceRange(); - Diag(Param->getLocation(), diag::note_entity_declared_at) << Param; Diag(Template->getLocation(), diag::note_entity_declared_at) << Template; MaybeEmitAmbiguousAtomicConstraintsDiagnostic(Param, ParamsAC, Template, TemplateAC); @@ -7438,25 +7399,24 @@ bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param, return false; } -static Sema::SemaDiagnosticBuilder noteLocation(Sema &S, const NamedDecl &Decl, - unsigned HereDiagID, - unsigned ExternalDiagID) { - if (Decl.getLocation().isValid()) - return S.Diag(Decl.getLocation(), HereDiagID); - +SmallString<128> Sema::toTerseString(const NamedDecl &D) const { SmallString<128> Str; llvm::raw_svector_ostream Out(Str); - PrintingPolicy PP = S.getPrintingPolicy(); + PrintingPolicy PP = getPrintingPolicy(); PP.TerseOutput = 1; - Decl.print(Out, PP); - return S.Diag(Decl.getLocation(), ExternalDiagID) << Out.str(); + D.print(Out, PP); + return Str; } +// FIXME: Transform this into a context note. void Sema::NoteTemplateLocation(const NamedDecl &Decl, std::optional ParamRange) { + bool HasLoc = Decl.getLocation().isValid(); SemaDiagnosticBuilder DB = - noteLocation(*this, Decl, diag::note_template_decl_here, - diag::note_template_decl_external); + Diag(Decl.getLocation(), HasLoc ? diag::note_template_decl_here + : diag::note_template_decl_external); + if (!HasLoc) + DB << toTerseString(Decl).str(); if (ParamRange && ParamRange->isValid()) { assert(Decl.getLocation().isValid() && "Parameter range has location when Decl does not"); @@ -7464,11 +7424,6 @@ void Sema::NoteTemplateLocation(const NamedDecl &Decl, } } -void Sema::NoteTemplateParameterLocation(const NamedDecl &Decl) { - noteLocation(*this, Decl, diag::note_template_param_here, - diag::note_template_param_external); -} - ExprResult Sema::BuildExpressionFromDeclTemplateArgument( const TemplateArgument &Arg, QualType ParamType, SourceLocation Loc, NamedDecl *TemplateParam) { @@ -7745,21 +7700,17 @@ Sema::BuildExpressionFromNonTypeTemplateArgument(const TemplateArgument &Arg, } /// Match two template parameters within template parameter lists. -static bool MatchTemplateParameterKind( - Sema &S, NamedDecl *New, - const Sema::TemplateCompareNewDeclInfo &NewInstFrom, NamedDecl *Old, - const NamedDecl *OldInstFrom, bool Complain, - Sema::TemplateParameterListEqualKind Kind, SourceLocation TemplateArgLoc) { +static bool +MatchTemplateParameterKind(Sema &S, NamedDecl *New, + const Sema::TemplateCompareNewDeclInfo &NewInstFrom, + NamedDecl *Old, const NamedDecl *OldInstFrom, + bool Complain, + Sema::TemplateParameterListEqualKind Kind) { // Check the actual kind (type, non-type, template). if (Old->getKind() != New->getKind()) { if (Complain) { - unsigned NextDiag = diag::err_template_param_different_kind; - if (TemplateArgLoc.isValid()) { - S.Diag(TemplateArgLoc, diag::err_template_arg_template_params_mismatch); - NextDiag = diag::note_template_param_different_kind; - } - S.Diag(New->getLocation(), NextDiag) - << (Kind != Sema::TPL_TemplateMatch); + S.Diag(New->getLocation(), diag::err_template_param_different_kind) + << (Kind != Sema::TPL_TemplateMatch); S.Diag(Old->getLocation(), diag::note_template_prev_declaration) << (Kind != Sema::TPL_TemplateMatch); } @@ -7773,18 +7724,11 @@ static bool MatchTemplateParameterKind( // a parameter pack where the template template argument does not. if (Old->isTemplateParameterPack() != New->isTemplateParameterPack()) { if (Complain) { - unsigned NextDiag = diag::err_template_parameter_pack_non_pack; - if (TemplateArgLoc.isValid()) { - S.Diag(TemplateArgLoc, - diag::err_template_arg_template_params_mismatch); - NextDiag = diag::note_template_parameter_pack_non_pack; - } - unsigned ParamKind = isa(New)? 0 : isa(New)? 1 : 2; - S.Diag(New->getLocation(), NextDiag) - << ParamKind << New->isParameterPack(); + S.Diag(New->getLocation(), diag::err_template_parameter_pack_non_pack) + << ParamKind << New->isParameterPack(); S.Diag(Old->getLocation(), diag::note_template_parameter_pack_here) << ParamKind << Old->isParameterPack(); } @@ -7805,13 +7749,8 @@ static bool MatchTemplateParameterKind( QualType NewType = S.Context.getUnconstrainedType(NewNTTP->getType()); if (!S.Context.hasSameType(OldType, NewType)) { if (Complain) { - unsigned NextDiag = diag::err_template_nontype_parm_different_type; - if (TemplateArgLoc.isValid()) { - S.Diag(TemplateArgLoc, - diag::err_template_arg_template_params_mismatch); - NextDiag = diag::note_template_nontype_parm_different_type; - } - S.Diag(NewNTTP->getLocation(), NextDiag) + S.Diag(NewNTTP->getLocation(), + diag::err_template_nontype_parm_different_type) << NewNTTP->getType() << (Kind != Sema::TPL_TemplateMatch); S.Diag(OldNTTP->getLocation(), diag::note_template_nontype_parm_prev_declaration) @@ -7832,8 +7771,7 @@ static bool MatchTemplateParameterKind( OldTTP->getTemplateParameters(), Complain, (Kind == Sema::TPL_TemplateMatch ? Sema::TPL_TemplateTemplateParmMatch - : Kind), - TemplateArgLoc)) + : Kind))) return false; } @@ -7884,21 +7822,12 @@ static bool MatchTemplateParameterKind( /// Diagnose a known arity mismatch when comparing template argument /// lists. -static -void DiagnoseTemplateParameterListArityMismatch(Sema &S, - TemplateParameterList *New, - TemplateParameterList *Old, - Sema::TemplateParameterListEqualKind Kind, - SourceLocation TemplateArgLoc) { - unsigned NextDiag = diag::err_template_param_list_different_arity; - if (TemplateArgLoc.isValid()) { - S.Diag(TemplateArgLoc, diag::err_template_arg_template_params_mismatch); - NextDiag = diag::note_template_param_list_different_arity; - } - S.Diag(New->getTemplateLoc(), NextDiag) - << (New->size() > Old->size()) - << (Kind != Sema::TPL_TemplateMatch) - << SourceRange(New->getTemplateLoc(), New->getRAngleLoc()); +static void DiagnoseTemplateParameterListArityMismatch( + Sema &S, TemplateParameterList *New, TemplateParameterList *Old, + Sema::TemplateParameterListEqualKind Kind) { + S.Diag(New->getTemplateLoc(), diag::err_template_param_list_different_arity) + << (New->size() > Old->size()) << (Kind != Sema::TPL_TemplateMatch) + << SourceRange(New->getTemplateLoc(), New->getRAngleLoc()); S.Diag(Old->getTemplateLoc(), diag::note_template_prev_declaration) << (Kind != Sema::TPL_TemplateMatch) << SourceRange(Old->getTemplateLoc(), Old->getRAngleLoc()); @@ -7907,11 +7836,10 @@ void DiagnoseTemplateParameterListArityMismatch(Sema &S, bool Sema::TemplateParameterListsAreEqual( const TemplateCompareNewDeclInfo &NewInstFrom, TemplateParameterList *New, const NamedDecl *OldInstFrom, TemplateParameterList *Old, bool Complain, - TemplateParameterListEqualKind Kind, SourceLocation TemplateArgLoc) { + TemplateParameterListEqualKind Kind) { if (Old->size() != New->size()) { if (Complain) - DiagnoseTemplateParameterListArityMismatch(*this, New, Old, Kind, - TemplateArgLoc); + DiagnoseTemplateParameterListArityMismatch(*this, New, Old, Kind); return false; } @@ -7929,21 +7857,18 @@ bool Sema::TemplateParameterListsAreEqual( OldParm != OldParmEnd; ++OldParm, ++NewParm) { if (NewParm == NewParmEnd) { if (Complain) - DiagnoseTemplateParameterListArityMismatch(*this, New, Old, Kind, - TemplateArgLoc); + DiagnoseTemplateParameterListArityMismatch(*this, New, Old, Kind); return false; } if (!MatchTemplateParameterKind(*this, *NewParm, NewInstFrom, *OldParm, - OldInstFrom, Complain, Kind, - TemplateArgLoc)) + OldInstFrom, Complain, Kind)) return false; } // Make sure we exhausted all of the arguments. if (NewParm != NewParmEnd) { if (Complain) - DiagnoseTemplateParameterListArityMismatch(*this, New, Old, Kind, - TemplateArgLoc); + DiagnoseTemplateParameterListArityMismatch(*this, New, Old, Kind); return false; } @@ -7991,8 +7916,11 @@ Sema::CheckTemplateDeclScope(Scope *S, TemplateParameterList *TemplateParams) { // have C linkage. DeclContext *Ctx = S->getEntity(); if (Ctx && Ctx->isExternCContext()) { - Diag(TemplateParams->getTemplateLoc(), diag::err_template_linkage) - << TemplateParams->getSourceRange(); + SourceRange Range = + TemplateParams->getTemplateLoc().isInvalid() && TemplateParams->size() + ? TemplateParams->getParam(0)->getSourceRange() + : TemplateParams->getSourceRange(); + Diag(Range.getBegin(), diag::err_template_linkage) << Range; if (const LinkageSpecDecl *LSD = Ctx->getExternCContext()) Diag(LSD->getExternLoc(), diag::note_extern_c_begins_here); return true; @@ -8238,7 +8166,6 @@ static bool CheckNonTypeTemplatePartialSpecializationArgs( S.Diag(IsDefaultArgument ? TemplateNameLoc : ArgExpr->getBeginLoc(), diag::err_dependent_typed_non_type_arg_in_partial_spec) << Param->getType(); - S.NoteTemplateParameterLocation(*Param); return true; } } @@ -8262,6 +8189,7 @@ bool Sema::CheckTemplatePartialSpecializationArgs( if (!Param) continue; + CheckTemplateParameterRAII CTP(*this, Param); if (CheckNonTypeTemplatePartialSpecializationArgs(*this, TemplateNameLoc, Param, &TemplateArgs[I], 1, I >= NumExplicit)) @@ -10914,20 +10842,15 @@ Sema::CheckTypenameType(ElaboratedTypeKeyword Keyword, // // FIXME: That's not strictly true: mem-initializer-id lookup does not // ignore functions, but that appears to be an oversight. - auto *LookupRD = dyn_cast_or_null(Ctx); - auto *FoundRD = dyn_cast(Type); - if (Keyword == ElaboratedTypeKeyword::Typename && LookupRD && FoundRD && - FoundRD->isInjectedClassName() && - declaresSameEntity(LookupRD, cast(FoundRD->getParent()))) - Diag(IILoc, diag::ext_out_of_line_qualified_id_type_names_constructor) - << &II << 1 << 0 /*'typename' keyword used*/; - + QualType T = getTypeDeclType(Ctx, + Keyword == ElaboratedTypeKeyword::Typename + ? DiagCtorKind::Typename + : DiagCtorKind::None, + Type, IILoc); // We found a type. Build an ElaboratedType, since the // typename-specifier was just sugar. - MarkAnyDeclReferenced(Type->getLocation(), Type, /*OdrUse=*/false); - return Context.getElaboratedType(Keyword, - QualifierLoc.getNestedNameSpecifier(), - Context.getTypeDeclType(Type)); + return Context.getElaboratedType( + Keyword, QualifierLoc.getNestedNameSpecifier(), T); } // C++ [dcl.type.simple]p2: diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 627cd82ed1c77..b1a1f86c5cfe1 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -3006,7 +3006,7 @@ ConvertDeducedTemplateArgument(Sema &S, NamedDecl *Param, // arguments). S.Diag(Param->getLocation(), diag::err_template_arg_deduced_incomplete_pack) - << Arg << Param; + << Arg << Param; return true; } if (ConvertArg(InnerArg, SugaredPackedArgsBuilder.size())) @@ -3072,7 +3072,7 @@ static TemplateDeductionResult ConvertDeducedTemplateArguments( for (unsigned I = 0, N = TemplateParams->size(); I != N; ++I) { NamedDecl *Param = TemplateParams->getParam(I); - + Sema::CheckTemplateParameterRAII CTP(S, Param); // C++0x [temp.arg.explicit]p3: // A trailing template parameter pack (14.5.3) not otherwise deduced will // be deduced to an empty sequence of template arguments. @@ -4506,7 +4506,8 @@ static TemplateDeductionResult DeduceFromInitializerList( // C++ [temp.deduct.type]p13: // The type of N in the type T[N] is std::size_t. QualType T = S.Context.getSizeType(); - llvm::APInt Size(S.Context.getIntWidth(T), ILE->getNumInits()); + llvm::APInt Size(S.Context.getIntWidth(T), + ILE->getNumInitsWithEmbedExpanded()); if (auto Result = DeduceNonTypeTemplateArgument( S, TemplateParams, NTTP, llvm::APSInt(Size), T, /*ArrayBound=*/true, Info, /*PartialOrdering=*/false, Deduced, @@ -5861,10 +5862,42 @@ static bool isAtLeastAsSpecializedAs( return true; } +enum class MoreSpecializedTrailingPackTieBreakerResult { Equal, Less, More }; + +// This a speculative fix for CWG1432 (Similar to the fix for CWG1395) that +// there is no wording or even resolution for this issue. +static MoreSpecializedTrailingPackTieBreakerResult +getMoreSpecializedTrailingPackTieBreaker( + const TemplateSpecializationType *TST1, + const TemplateSpecializationType *TST2) { + ArrayRef As1 = TST1->template_arguments(), + As2 = TST2->template_arguments(); + const TemplateArgument &TA1 = As1.back(), &TA2 = As2.back(); + bool IsPack = TA1.getKind() == TemplateArgument::Pack; + assert(IsPack == (TA2.getKind() == TemplateArgument::Pack)); + if (!IsPack) + return MoreSpecializedTrailingPackTieBreakerResult::Equal; + assert(As1.size() == As2.size()); + + unsigned PackSize1 = TA1.pack_size(), PackSize2 = TA2.pack_size(); + bool IsPackExpansion1 = + PackSize1 && TA1.pack_elements().back().isPackExpansion(); + bool IsPackExpansion2 = + PackSize2 && TA2.pack_elements().back().isPackExpansion(); + if (PackSize1 == PackSize2 && IsPackExpansion1 == IsPackExpansion2) + return MoreSpecializedTrailingPackTieBreakerResult::Equal; + if (PackSize1 > PackSize2 && IsPackExpansion1) + return MoreSpecializedTrailingPackTieBreakerResult::More; + if (PackSize1 < PackSize2 && IsPackExpansion2) + return MoreSpecializedTrailingPackTieBreakerResult::Less; + return MoreSpecializedTrailingPackTieBreakerResult::Equal; +} + FunctionTemplateDecl *Sema::getMoreSpecializedTemplate( FunctionTemplateDecl *FT1, FunctionTemplateDecl *FT2, SourceLocation Loc, TemplatePartialOrderingContext TPOC, unsigned NumCallArguments1, - QualType RawObj1Ty, QualType RawObj2Ty, bool Reversed) { + QualType RawObj1Ty, QualType RawObj2Ty, bool Reversed, + bool PartialOverloading) { SmallVector Args1; SmallVector Args2; const FunctionDecl *FD1 = FT1->getTemplatedDecl(); @@ -6000,34 +6033,27 @@ FunctionTemplateDecl *Sema::getMoreSpecializedTemplate( return FT1; } - // This a speculative fix for CWG1432 (Similar to the fix for CWG1395) that - // there is no wording or even resolution for this issue. - for (int i = 0, e = std::min(NumParams1, NumParams2); i < e; ++i) { + // Skip this tie breaker if we are performing overload resolution with partial + // arguments, as this breaks some assumptions about how closely related the + // candidates are. + for (int i = 0, e = std::min(NumParams1, NumParams2); + !PartialOverloading && i < e; ++i) { QualType T1 = Param1[i].getCanonicalType(); QualType T2 = Param2[i].getCanonicalType(); auto *TST1 = dyn_cast(T1); auto *TST2 = dyn_cast(T2); if (!TST1 || !TST2) continue; - const TemplateArgument &TA1 = TST1->template_arguments().back(); - if (TA1.getKind() == TemplateArgument::Pack) { - assert(TST1->template_arguments().size() == - TST2->template_arguments().size()); - const TemplateArgument &TA2 = TST2->template_arguments().back(); - assert(TA2.getKind() == TemplateArgument::Pack); - unsigned PackSize1 = TA1.pack_size(); - unsigned PackSize2 = TA2.pack_size(); - bool IsPackExpansion1 = - PackSize1 && TA1.pack_elements().back().isPackExpansion(); - bool IsPackExpansion2 = - PackSize2 && TA2.pack_elements().back().isPackExpansion(); - if (PackSize1 != PackSize2 && IsPackExpansion1 != IsPackExpansion2) { - if (PackSize1 > PackSize2 && IsPackExpansion1) - return FT2; - if (PackSize1 < PackSize2 && IsPackExpansion2) - return FT1; - } + switch (getMoreSpecializedTrailingPackTieBreaker(TST1, TST2)) { + case MoreSpecializedTrailingPackTieBreakerResult::Less: + return FT1; + case MoreSpecializedTrailingPackTieBreakerResult::More: + return FT2; + case MoreSpecializedTrailingPackTieBreakerResult::Equal: + continue; } + llvm_unreachable( + "unknown MoreSpecializedTrailingPackTieBreakerResult value"); } if (!Context.getLangOpts().CPlusPlus20) @@ -6374,28 +6400,15 @@ getMoreSpecialized(Sema &S, QualType T1, QualType T2, TemplateLikeDecl *P1, if (!Better1 && !Better2) return nullptr; - // This a speculative fix for CWG1432 (Similar to the fix for CWG1395) that - // there is no wording or even resolution for this issue. - auto *TST1 = cast(T1); - auto *TST2 = cast(T2); - const TemplateArgument &TA1 = TST1->template_arguments().back(); - if (TA1.getKind() == TemplateArgument::Pack) { - assert(TST1->template_arguments().size() == - TST2->template_arguments().size()); - const TemplateArgument &TA2 = TST2->template_arguments().back(); - assert(TA2.getKind() == TemplateArgument::Pack); - unsigned PackSize1 = TA1.pack_size(); - unsigned PackSize2 = TA2.pack_size(); - bool IsPackExpansion1 = - PackSize1 && TA1.pack_elements().back().isPackExpansion(); - bool IsPackExpansion2 = - PackSize2 && TA2.pack_elements().back().isPackExpansion(); - if (PackSize1 != PackSize2 && IsPackExpansion1 != IsPackExpansion2) { - if (PackSize1 > PackSize2 && IsPackExpansion1) - return GetP2()(P1, P2); - if (PackSize1 < PackSize2 && IsPackExpansion2) - return P1; - } + switch (getMoreSpecializedTrailingPackTieBreaker( + cast(T1), + cast(T2))) { + case MoreSpecializedTrailingPackTieBreakerResult::Less: + return P1; + case MoreSpecializedTrailingPackTieBreakerResult::More: + return GetP2()(P1, P2); + case MoreSpecializedTrailingPackTieBreakerResult::Equal: + break; } if (!S.Context.getLangOpts().CPlusPlus20) diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index fcb7671ed92f0..847057a3f70ea 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -575,6 +575,7 @@ bool Sema::CodeSynthesisContext::isInstantiationRecord() const { case BuildingDeductionGuides: case TypeAliasTemplateInstantiation: case PartialOrderingTTP: + case CheckTemplateParameter: return false; // This function should never be called when Kind's value is Memoization. @@ -809,7 +810,16 @@ Sema::InstantiatingTemplate::InstantiatingTemplate( Sema &SemaRef, SourceLocation ArgLoc, PartialOrderingTTP, TemplateDecl *PArg, SourceRange InstantiationRange) : InstantiatingTemplate(SemaRef, CodeSynthesisContext::PartialOrderingTTP, - ArgLoc, InstantiationRange, PArg) {} + ArgLoc, SourceRange(), PArg) {} + +Sema::InstantiatingTemplate::InstantiatingTemplate(Sema &SemaRef, + CheckTemplateParameter, + NamedDecl *Param) + : InstantiatingTemplate( + SemaRef, CodeSynthesisContext::CheckTemplateParameter, + Param->getLocation(), Param->getSourceRange(), Param) { + assert(Param->isTemplateParameter()); +} void Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) { Ctx.SavedInNonInstantiationSFINAEContext = InNonInstantiationSFINAEContext; @@ -1251,13 +1261,19 @@ void Sema::PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc) { case CodeSynthesisContext::PartialOrderingTTP: DiagFunc(Active->PointOfInstantiation, PDiag(diag::note_template_arg_template_params_mismatch)); - if (SourceLocation ParamLoc = Active->Entity->getLocation(); - ParamLoc.isValid()) - DiagFunc(ParamLoc, PDiag(diag::note_template_prev_declaration) - << /*isTemplateTemplateParam=*/true - << Active->InstantiationRange); + break; + case CodeSynthesisContext::CheckTemplateParameter: { + const auto &ND = *cast(Active->Entity); + if (SourceLocation Loc = ND.getLocation(); Loc.isValid()) { + DiagFunc(Loc, PDiag(diag::note_template_param_here) + << ND.getSourceRange()); + break; + } + DiagFunc(SourceLocation(), PDiag(diag::note_template_param_external) + << toTerseString(ND).str()); break; } + } } } @@ -1300,6 +1316,7 @@ std::optional Sema::isSFINAEContext() const { case CodeSynthesisContext::DefaultTemplateArgumentChecking: case CodeSynthesisContext::RewritingOperatorAsSpaceship: case CodeSynthesisContext::PartialOrderingTTP: + case CodeSynthesisContext::CheckTemplateParameter: // A default template argument instantiation and substitution into // template parameters with arguments for prior parameters may or may // not be a SFINAE context; look further up the stack. @@ -2348,6 +2365,7 @@ TemplateInstantiator::TransformSubstNonTypeTemplateParmPackExpr( ExprResult TemplateInstantiator::TransformSubstNonTypeTemplateParmExpr( SubstNonTypeTemplateParmExpr *E) { + Sema::CheckTemplateParameterRAII CTP(SemaRef, E->getParameter()); ExprResult SubstReplacement = E->getReplacement(); if (!isa(SubstReplacement.get())) SubstReplacement = TransformExpr(E->getReplacement()); @@ -4498,6 +4516,17 @@ Sema::SubstExpr(Expr *E, const MultiLevelTemplateArgumentList &TemplateArgs) { return Instantiator.TransformExpr(E); } +ExprResult +Sema::SubstCXXIdExpr(Expr *E, + const MultiLevelTemplateArgumentList &TemplateArgs) { + if (!E) + return E; + + TemplateInstantiator Instantiator(*this, TemplateArgs, SourceLocation(), + DeclarationName()); + return Instantiator.TransformAddressOfOperand(E); +} + ExprResult Sema::SubstConstraintExpr(Expr *E, const MultiLevelTemplateArgumentList &TemplateArgs) { diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 1cdf80898bfca..f3e33078c55c7 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -999,6 +999,321 @@ TemplateDeclInstantiator::VisitNamespaceDecl(NamespaceDecl *D) { llvm_unreachable("Namespaces cannot be instantiated"); } +namespace { +class OpenACCDeclClauseInstantiator final + : public OpenACCClauseVisitor { + Sema &SemaRef; + const MultiLevelTemplateArgumentList &MLTAL; + ArrayRef ExistingClauses; + SemaOpenACC::OpenACCParsedClause &ParsedClause; + OpenACCClause *NewClause = nullptr; + +public: + OpenACCDeclClauseInstantiator(Sema &S, + const MultiLevelTemplateArgumentList &MLTAL, + ArrayRef ExistingClauses, + SemaOpenACC::OpenACCParsedClause &ParsedClause) + : SemaRef(S), MLTAL(MLTAL), ExistingClauses(ExistingClauses), + ParsedClause(ParsedClause) {} + + OpenACCClause *CreatedClause() { return NewClause; } +#define VISIT_CLAUSE(CLAUSE_NAME) \ + void Visit##CLAUSE_NAME##Clause(const OpenACC##CLAUSE_NAME##Clause &Clause); +#include "clang/Basic/OpenACCClauses.def" + + llvm::SmallVector VisitVarList(ArrayRef VarList) { + llvm::SmallVector InstantiatedVarList; + for (Expr *CurVar : VarList) { + ExprResult Res = SemaRef.SubstExpr(CurVar, MLTAL); + + if (!Res.isUsable()) + continue; + + Res = SemaRef.OpenACC().ActOnVar(ParsedClause.getDirectiveKind(), + ParsedClause.getClauseKind(), Res.get()); + + if (Res.isUsable()) + InstantiatedVarList.push_back(Res.get()); + } + return InstantiatedVarList; + } +}; + +#define CLAUSE_NOT_ON_DECLS(CLAUSE_NAME) \ + void OpenACCDeclClauseInstantiator::Visit##CLAUSE_NAME##Clause( \ + const OpenACC##CLAUSE_NAME##Clause &) { \ + llvm_unreachable("Clause type invalid on declaration construct, or " \ + "instantiation not implemented"); \ + } + +CLAUSE_NOT_ON_DECLS(Auto) +CLAUSE_NOT_ON_DECLS(Async) +CLAUSE_NOT_ON_DECLS(Attach) +CLAUSE_NOT_ON_DECLS(Collapse) +CLAUSE_NOT_ON_DECLS(Default) +CLAUSE_NOT_ON_DECLS(DefaultAsync) +CLAUSE_NOT_ON_DECLS(Delete) +CLAUSE_NOT_ON_DECLS(Detach) +CLAUSE_NOT_ON_DECLS(Device) +CLAUSE_NOT_ON_DECLS(DeviceNum) +CLAUSE_NOT_ON_DECLS(DeviceType) +CLAUSE_NOT_ON_DECLS(Finalize) +CLAUSE_NOT_ON_DECLS(FirstPrivate) +CLAUSE_NOT_ON_DECLS(Host) +CLAUSE_NOT_ON_DECLS(If) +CLAUSE_NOT_ON_DECLS(IfPresent) +CLAUSE_NOT_ON_DECLS(Independent) +CLAUSE_NOT_ON_DECLS(NoCreate) +CLAUSE_NOT_ON_DECLS(NumGangs) +CLAUSE_NOT_ON_DECLS(NumWorkers) +CLAUSE_NOT_ON_DECLS(Private) +CLAUSE_NOT_ON_DECLS(Reduction) +CLAUSE_NOT_ON_DECLS(Self) +CLAUSE_NOT_ON_DECLS(Tile) +CLAUSE_NOT_ON_DECLS(UseDevice) +CLAUSE_NOT_ON_DECLS(VectorLength) +CLAUSE_NOT_ON_DECLS(Wait) +#undef CLAUSE_NOT_ON_DECLS + +void OpenACCDeclClauseInstantiator::VisitGangClause( + const OpenACCGangClause &C) { + llvm::SmallVector TransformedGangKinds; + llvm::SmallVector TransformedIntExprs; + assert(C.getNumExprs() <= 1 && + "Only 1 expression allowed on gang clause in routine"); + + if (C.getNumExprs() > 0) { + assert(C.getExpr(0).first == OpenACCGangKind::Dim && + "Only dim allowed on routine"); + ExprResult ER = + SemaRef.SubstExpr(const_cast(C.getExpr(0).second), MLTAL); + if (ER.isUsable()) { + ER = SemaRef.OpenACC().CheckGangExpr(ExistingClauses, + ParsedClause.getDirectiveKind(), + C.getExpr(0).first, ER.get()); + if (ER.isUsable()) { + TransformedGangKinds.push_back(OpenACCGangKind::Dim); + TransformedIntExprs.push_back(ER.get()); + } + } + } + + NewClause = SemaRef.OpenACC().CheckGangClause( + ParsedClause.getDirectiveKind(), ExistingClauses, + ParsedClause.getBeginLoc(), ParsedClause.getLParenLoc(), + TransformedGangKinds, TransformedIntExprs, ParsedClause.getEndLoc()); +} + +void OpenACCDeclClauseInstantiator::VisitSeqClause(const OpenACCSeqClause &C) { + NewClause = OpenACCSeqClause::Create(SemaRef.getASTContext(), + ParsedClause.getBeginLoc(), + ParsedClause.getEndLoc()); +} +void OpenACCDeclClauseInstantiator::VisitNoHostClause( + const OpenACCNoHostClause &C) { + NewClause = OpenACCNoHostClause::Create(SemaRef.getASTContext(), + ParsedClause.getBeginLoc(), + ParsedClause.getEndLoc()); +} + +void OpenACCDeclClauseInstantiator::VisitWorkerClause( + const OpenACCWorkerClause &C) { + assert(!C.hasIntExpr() && "Int Expr not allowed on routine 'worker' clause"); + NewClause = OpenACCWorkerClause::Create(SemaRef.getASTContext(), + ParsedClause.getBeginLoc(), {}, + nullptr, ParsedClause.getEndLoc()); +} + +void OpenACCDeclClauseInstantiator::VisitVectorClause( + const OpenACCVectorClause &C) { + assert(!C.hasIntExpr() && "Int Expr not allowed on routine 'vector' clause"); + NewClause = OpenACCVectorClause::Create(SemaRef.getASTContext(), + ParsedClause.getBeginLoc(), {}, + nullptr, ParsedClause.getEndLoc()); +} + +void OpenACCDeclClauseInstantiator::VisitCopyClause( + const OpenACCCopyClause &C) { + ParsedClause.setVarListDetails(VisitVarList(C.getVarList()), + /*IsReadOnly=*/false, /*IsZero=*/false); + if (SemaRef.OpenACC().CheckDeclareClause(ParsedClause)) + return; + NewClause = OpenACCCopyClause::Create( + SemaRef.getASTContext(), ParsedClause.getClauseKind(), + ParsedClause.getBeginLoc(), ParsedClause.getLParenLoc(), + ParsedClause.getVarList(), ParsedClause.getEndLoc()); +} + +void OpenACCDeclClauseInstantiator::VisitLinkClause( + const OpenACCLinkClause &C) { + ParsedClause.setVarListDetails( + SemaRef.OpenACC().CheckLinkClauseVarList(VisitVarList(C.getVarList())), + /*IsReadOnly=*/false, /*IsZero=*/false); + + if (SemaRef.OpenACC().CheckDeclareClause(ParsedClause)) + return; + + NewClause = OpenACCLinkClause::Create( + SemaRef.getASTContext(), ParsedClause.getBeginLoc(), + ParsedClause.getLParenLoc(), ParsedClause.getVarList(), + ParsedClause.getEndLoc()); +} + +void OpenACCDeclClauseInstantiator::VisitDeviceResidentClause( + const OpenACCDeviceResidentClause &C) { + ParsedClause.setVarListDetails(VisitVarList(C.getVarList()), + /*IsReadOnly=*/false, /*IsZero=*/false); + if (SemaRef.OpenACC().CheckDeclareClause(ParsedClause)) + return; + NewClause = OpenACCDeviceResidentClause::Create( + SemaRef.getASTContext(), ParsedClause.getBeginLoc(), + ParsedClause.getLParenLoc(), ParsedClause.getVarList(), + ParsedClause.getEndLoc()); +} + +void OpenACCDeclClauseInstantiator::VisitCopyInClause( + const OpenACCCopyInClause &C) { + ParsedClause.setVarListDetails(VisitVarList(C.getVarList()), C.isReadOnly(), + /*IsZero=*/false); + + if (SemaRef.OpenACC().CheckDeclareClause(ParsedClause)) + return; + NewClause = OpenACCCopyInClause::Create( + SemaRef.getASTContext(), ParsedClause.getClauseKind(), + ParsedClause.getBeginLoc(), ParsedClause.getLParenLoc(), + ParsedClause.isReadOnly(), ParsedClause.getVarList(), + ParsedClause.getEndLoc()); +} +void OpenACCDeclClauseInstantiator::VisitCopyOutClause( + const OpenACCCopyOutClause &C) { + ParsedClause.setVarListDetails(VisitVarList(C.getVarList()), + /*IsReadOnly=*/false, C.isZero()); + + if (SemaRef.OpenACC().CheckDeclareClause(ParsedClause)) + return; + NewClause = OpenACCCopyOutClause::Create( + SemaRef.getASTContext(), ParsedClause.getClauseKind(), + ParsedClause.getBeginLoc(), ParsedClause.getLParenLoc(), + ParsedClause.isZero(), ParsedClause.getVarList(), + ParsedClause.getEndLoc()); +} +void OpenACCDeclClauseInstantiator::VisitCreateClause( + const OpenACCCreateClause &C) { + ParsedClause.setVarListDetails(VisitVarList(C.getVarList()), + /*IsReadOnly=*/false, C.isZero()); + + if (SemaRef.OpenACC().CheckDeclareClause(ParsedClause)) + return; + NewClause = OpenACCCreateClause::Create( + SemaRef.getASTContext(), ParsedClause.getClauseKind(), + ParsedClause.getBeginLoc(), ParsedClause.getLParenLoc(), + ParsedClause.isZero(), ParsedClause.getVarList(), + ParsedClause.getEndLoc()); +} +void OpenACCDeclClauseInstantiator::VisitPresentClause( + const OpenACCPresentClause &C) { + ParsedClause.setVarListDetails(VisitVarList(C.getVarList()), + /*IsReadOnly=*/false, /*IsZero=*/false); + if (SemaRef.OpenACC().CheckDeclareClause(ParsedClause)) + return; + NewClause = OpenACCPresentClause::Create( + SemaRef.getASTContext(), ParsedClause.getBeginLoc(), + ParsedClause.getLParenLoc(), ParsedClause.getVarList(), + ParsedClause.getEndLoc()); +} +void OpenACCDeclClauseInstantiator::VisitDevicePtrClause( + const OpenACCDevicePtrClause &C) { + llvm::SmallVector VarList = VisitVarList(C.getVarList()); + // Ensure each var is a pointer type. + VarList.erase(std::remove_if(VarList.begin(), VarList.end(), + [&](Expr *E) { + return SemaRef.OpenACC().CheckVarIsPointerType( + OpenACCClauseKind::DevicePtr, E); + }), + VarList.end()); + ParsedClause.setVarListDetails(VarList, + /*IsReadOnly=*/false, /*IsZero=*/false); + if (SemaRef.OpenACC().CheckDeclareClause(ParsedClause)) + return; + NewClause = OpenACCDevicePtrClause::Create( + SemaRef.getASTContext(), ParsedClause.getBeginLoc(), + ParsedClause.getLParenLoc(), ParsedClause.getVarList(), + ParsedClause.getEndLoc()); +} + +llvm::SmallVector InstantiateOpenACCClauseList( + Sema &S, const MultiLevelTemplateArgumentList &MLTAL, + OpenACCDirectiveKind DK, ArrayRef ClauseList) { + llvm::SmallVector TransformedClauses; + + for (const auto *Clause : ClauseList) { + SemaOpenACC::OpenACCParsedClause ParsedClause(DK, Clause->getClauseKind(), + Clause->getBeginLoc()); + ParsedClause.setEndLoc(Clause->getEndLoc()); + if (const auto *WithParms = dyn_cast(Clause)) + ParsedClause.setLParenLoc(WithParms->getLParenLoc()); + + OpenACCDeclClauseInstantiator Instantiator{S, MLTAL, TransformedClauses, + ParsedClause}; + Instantiator.Visit(Clause); + if (Instantiator.CreatedClause()) + TransformedClauses.push_back(Instantiator.CreatedClause()); + } + return TransformedClauses; +} + +} // namespace + +Decl *TemplateDeclInstantiator::VisitOpenACCDeclareDecl(OpenACCDeclareDecl *D) { + SemaRef.OpenACC().ActOnConstruct(D->getDirectiveKind(), D->getBeginLoc()); + llvm::SmallVector TransformedClauses = + InstantiateOpenACCClauseList(SemaRef, TemplateArgs, D->getDirectiveKind(), + D->clauses()); + + if (SemaRef.OpenACC().ActOnStartDeclDirective( + D->getDirectiveKind(), D->getBeginLoc(), TransformedClauses)) + return nullptr; + + DeclGroupRef Res = SemaRef.OpenACC().ActOnEndDeclDirective( + D->getDirectiveKind(), D->getBeginLoc(), D->getDirectiveLoc(), {}, + nullptr, {}, D->getEndLoc(), TransformedClauses); + + if (Res.isNull()) + return nullptr; + + return Res.getSingleDecl(); +} + +Decl *TemplateDeclInstantiator::VisitOpenACCRoutineDecl(OpenACCRoutineDecl *D) { + SemaRef.OpenACC().ActOnConstruct(D->getDirectiveKind(), D->getBeginLoc()); + llvm::SmallVector TransformedClauses = + InstantiateOpenACCClauseList(SemaRef, TemplateArgs, D->getDirectiveKind(), + D->clauses()); + + ExprResult FuncRef; + if (D->getFunctionReference()) { + FuncRef = SemaRef.SubstCXXIdExpr(D->getFunctionReference(), TemplateArgs); + if (FuncRef.isUsable()) + FuncRef = SemaRef.OpenACC().ActOnRoutineName(FuncRef.get()); + // We don't return early here, we leave the construct in the AST, even if + // the function decl is empty. + } + + if (SemaRef.OpenACC().ActOnStartDeclDirective( + D->getDirectiveKind(), D->getBeginLoc(), TransformedClauses)) + return nullptr; + + DeclGroupRef Res = SemaRef.OpenACC().ActOnEndDeclDirective( + D->getDirectiveKind(), D->getBeginLoc(), D->getDirectiveLoc(), + D->getLParenLoc(), FuncRef.get(), D->getRParenLoc(), D->getEndLoc(), + TransformedClauses); + + if (Res.isNull()) + return nullptr; + + return Res.getSingleDecl(); +} + Decl * TemplateDeclInstantiator::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) { NamespaceAliasDecl *Inst @@ -2134,7 +2449,8 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) { // DR1484 clarifies that the members of a local class are instantiated as part // of the instantiation of their enclosing entity. if (D->isCompleteDefinition() && D->isLocalClass()) { - Sema::LocalEagerInstantiationScope LocalInstantiations(SemaRef); + Sema::LocalEagerInstantiationScope LocalInstantiations(SemaRef, + /*AtEndOfTU=*/false); SemaRef.InstantiateClass(D->getLocation(), Record, D, TemplateArgs, TSK_ImplicitInstantiation, @@ -5116,8 +5432,10 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, // This has to happen before LateTemplateParser below is called, so that // it marks vtables used in late parsed templates as used. GlobalEagerInstantiationScope GlobalInstantiations(*this, - /*Enabled=*/Recursive); - LocalEagerInstantiationScope LocalInstantiations(*this); + /*Enabled=*/Recursive, + /*AtEndOfTU=*/AtEndOfTU); + LocalEagerInstantiationScope LocalInstantiations(*this, + /*AtEndOfTU=*/AtEndOfTU); // Call the LateTemplateParser callback if there is a need to late parse // a templated function definition. @@ -5691,10 +6009,12 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation, // If we're performing recursive template instantiation, create our own // queue of pending implicit instantiations that we will instantiate // later, while we're still within our own instantiation context. - GlobalEagerInstantiationScope GlobalInstantiations(*this, - /*Enabled=*/Recursive); + GlobalEagerInstantiationScope GlobalInstantiations( + *this, + /*Enabled=*/Recursive, /*AtEndOfTU=*/AtEndOfTU); LocalInstantiationScope Local(*this); - LocalEagerInstantiationScope LocalInstantiations(*this); + LocalEagerInstantiationScope LocalInstantiations(*this, + /*AtEndOfTU=*/AtEndOfTU); // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. @@ -5791,14 +6111,16 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation, // queue of pending implicit instantiations that we will instantiate later, // while we're still within our own instantiation context. GlobalEagerInstantiationScope GlobalInstantiations(*this, - /*Enabled=*/Recursive); + /*Enabled=*/Recursive, + /*AtEndOfTU=*/AtEndOfTU); // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. ContextRAII PreviousContext(*this, Var->getDeclContext()); LocalInstantiationScope Local(*this); - LocalEagerInstantiationScope LocalInstantiations(*this); + LocalEagerInstantiationScope LocalInstantiations(*this, + /*AtEndOfTU=*/AtEndOfTU); VarDecl *OldVar = Var; if (Def->isStaticDataMember() && !Def->isOutOfLine()) { @@ -6546,18 +6868,20 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D, return D; } -void Sema::PerformPendingInstantiations(bool LocalOnly) { - std::deque delayedPCHInstantiations; +void Sema::PerformPendingInstantiations(bool LocalOnly, bool AtEndOfTU) { + std::deque DelayedImplicitInstantiations; while (!PendingLocalImplicitInstantiations.empty() || (!LocalOnly && !PendingInstantiations.empty())) { PendingImplicitInstantiation Inst; + bool LocalInstantiation = false; if (PendingLocalImplicitInstantiations.empty()) { Inst = PendingInstantiations.front(); PendingInstantiations.pop_front(); } else { Inst = PendingLocalImplicitInstantiations.front(); PendingLocalImplicitInstantiations.pop_front(); + LocalInstantiation = true; } // Instantiate function definitions @@ -6566,22 +6890,26 @@ void Sema::PerformPendingInstantiations(bool LocalOnly) { TSK_ExplicitInstantiationDefinition; if (Function->isMultiVersion()) { getASTContext().forEachMultiversionedFunctionVersion( - Function, [this, Inst, DefinitionRequired](FunctionDecl *CurFD) { + Function, + [this, Inst, DefinitionRequired, AtEndOfTU](FunctionDecl *CurFD) { InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, CurFD, true, - DefinitionRequired, true); + DefinitionRequired, AtEndOfTU); if (CurFD->isDefined()) CurFD->setInstantiationIsPending(false); }); } else { InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, Function, true, - DefinitionRequired, true); + DefinitionRequired, AtEndOfTU); if (Function->isDefined()) Function->setInstantiationIsPending(false); } // Definition of a PCH-ed template declaration may be available only in the TU. if (!LocalOnly && LangOpts.PCHInstantiateTemplates && TUKind == TU_Prefix && Function->instantiationIsPending()) - delayedPCHInstantiations.push_back(Inst); + DelayedImplicitInstantiations.push_back(Inst); + else if (!AtEndOfTU && Function->instantiationIsPending() && + !LocalInstantiation) + DelayedImplicitInstantiations.push_back(Inst); continue; } @@ -6625,11 +6953,11 @@ void Sema::PerformPendingInstantiations(bool LocalOnly) { // Instantiate static data member definitions or variable template // specializations. InstantiateVariableDefinition(/*FIXME:*/ Inst.second, Var, true, - DefinitionRequired, true); + DefinitionRequired, AtEndOfTU); } - if (!LocalOnly && LangOpts.PCHInstantiateTemplates) - PendingInstantiations.swap(delayedPCHInstantiations); + if (!DelayedImplicitInstantiations.empty()) + PendingInstantiations.swap(DelayedImplicitInstantiations); } void Sema::PerformDependentDiagnostics(const DeclContext *Pattern, diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp index fad00f7648848..d9256dbd07d7a 100644 --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -834,7 +834,7 @@ bool Sema::CheckParameterPacksForExpansion( if (TA.getKind() == TemplateArgument::Type) return !TA.getAsType() - ->getAs() + ->castAs() ->getNumExpansions(); if (TA.getKind() == TemplateArgument::Expression) diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 60096eebfdb6f..11943c0b53591 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -143,7 +143,8 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_PreserveAll: \ case ParsedAttr::AT_M68kRTD: \ case ParsedAttr::AT_PreserveNone: \ - case ParsedAttr::AT_RISCVVectorCC + case ParsedAttr::AT_RISCVVectorCC: \ + case ParsedAttr::AT_RISCVVLSCC // Function type attributes. #define FUNCTION_TYPE_ATTRS_CASELIST \ @@ -7629,6 +7630,20 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { return createSimpleAttr(Ctx, Attr); case ParsedAttr::AT_RISCVVectorCC: return createSimpleAttr(Ctx, Attr); + case ParsedAttr::AT_RISCVVLSCC: { + // If the riscv_abi_vlen doesn't have any argument, we set set it to default + // value 128. + unsigned ABIVLen = 128; + if (Attr.getNumArgs()) { + std::optional MaybeABIVLen = + Attr.getArgAsExpr(0)->getIntegerConstantExpr(Ctx); + if (!MaybeABIVLen) + llvm_unreachable("Invalid RISC-V ABI VLEN"); + ABIVLen = MaybeABIVLen->getZExtValue(); + } + + return ::new (Ctx) RISCVVLSCCAttr(Ctx, Attr, ABIVLen); + } } llvm_unreachable("unexpected attribute kind!"); } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index eaabfae2409f4..9591fd4cfcc1c 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -4093,7 +4093,7 @@ class TreeTransform { StmtResult StrBlock) { return getSema().OpenACC().ActOnEndStmtDirective( K, BeginLoc, DirLoc, SourceLocation{}, SourceLocation{}, {}, - SourceLocation{}, EndLoc, Clauses, StrBlock); + OpenACCAtomicKind::None, SourceLocation{}, EndLoc, Clauses, StrBlock); } StmtResult RebuildOpenACCLoopConstruct(SourceLocation BeginLoc, @@ -4103,7 +4103,8 @@ class TreeTransform { StmtResult Loop) { return getSema().OpenACC().ActOnEndStmtDirective( OpenACCDirectiveKind::Loop, BeginLoc, DirLoc, SourceLocation{}, - SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, Loop); + SourceLocation{}, {}, OpenACCAtomicKind::None, SourceLocation{}, EndLoc, + Clauses, Loop); } StmtResult RebuildOpenACCCombinedConstruct(OpenACCDirectiveKind K, @@ -4114,7 +4115,7 @@ class TreeTransform { StmtResult Loop) { return getSema().OpenACC().ActOnEndStmtDirective( K, BeginLoc, DirLoc, SourceLocation{}, SourceLocation{}, {}, - SourceLocation{}, EndLoc, Clauses, Loop); + OpenACCAtomicKind::None, SourceLocation{}, EndLoc, Clauses, Loop); } StmtResult RebuildOpenACCDataConstruct(SourceLocation BeginLoc, @@ -4124,7 +4125,8 @@ class TreeTransform { StmtResult StrBlock) { return getSema().OpenACC().ActOnEndStmtDirective( OpenACCDirectiveKind::Data, BeginLoc, DirLoc, SourceLocation{}, - SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, StrBlock); + SourceLocation{}, {}, OpenACCAtomicKind::None, SourceLocation{}, EndLoc, + Clauses, StrBlock); } StmtResult @@ -4133,7 +4135,8 @@ class TreeTransform { ArrayRef Clauses) { return getSema().OpenACC().ActOnEndStmtDirective( OpenACCDirectiveKind::EnterData, BeginLoc, DirLoc, SourceLocation{}, - SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, {}); + SourceLocation{}, {}, OpenACCAtomicKind::None, SourceLocation{}, EndLoc, + Clauses, {}); } StmtResult @@ -4142,7 +4145,8 @@ class TreeTransform { ArrayRef Clauses) { return getSema().OpenACC().ActOnEndStmtDirective( OpenACCDirectiveKind::ExitData, BeginLoc, DirLoc, SourceLocation{}, - SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, {}); + SourceLocation{}, {}, OpenACCAtomicKind::None, SourceLocation{}, EndLoc, + Clauses, {}); } StmtResult RebuildOpenACCHostDataConstruct(SourceLocation BeginLoc, @@ -4152,7 +4156,8 @@ class TreeTransform { StmtResult StrBlock) { return getSema().OpenACC().ActOnEndStmtDirective( OpenACCDirectiveKind::HostData, BeginLoc, DirLoc, SourceLocation{}, - SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, StrBlock); + SourceLocation{}, {}, OpenACCAtomicKind::None, SourceLocation{}, EndLoc, + Clauses, StrBlock); } StmtResult RebuildOpenACCInitConstruct(SourceLocation BeginLoc, @@ -4161,7 +4166,8 @@ class TreeTransform { ArrayRef Clauses) { return getSema().OpenACC().ActOnEndStmtDirective( OpenACCDirectiveKind::Init, BeginLoc, DirLoc, SourceLocation{}, - SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, {}); + SourceLocation{}, {}, OpenACCAtomicKind::None, SourceLocation{}, EndLoc, + Clauses, {}); } StmtResult @@ -4170,7 +4176,8 @@ class TreeTransform { ArrayRef Clauses) { return getSema().OpenACC().ActOnEndStmtDirective( OpenACCDirectiveKind::Shutdown, BeginLoc, DirLoc, SourceLocation{}, - SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, {}); + SourceLocation{}, {}, OpenACCAtomicKind::None, SourceLocation{}, EndLoc, + Clauses, {}); } StmtResult RebuildOpenACCSetConstruct(SourceLocation BeginLoc, @@ -4179,7 +4186,8 @@ class TreeTransform { ArrayRef Clauses) { return getSema().OpenACC().ActOnEndStmtDirective( OpenACCDirectiveKind::Set, BeginLoc, DirLoc, SourceLocation{}, - SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, {}); + SourceLocation{}, {}, OpenACCAtomicKind::None, SourceLocation{}, EndLoc, + Clauses, {}); } StmtResult RebuildOpenACCUpdateConstruct(SourceLocation BeginLoc, @@ -4188,7 +4196,8 @@ class TreeTransform { ArrayRef Clauses) { return getSema().OpenACC().ActOnEndStmtDirective( OpenACCDirectiveKind::Update, BeginLoc, DirLoc, SourceLocation{}, - SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, {}); + SourceLocation{}, {}, OpenACCAtomicKind::None, SourceLocation{}, EndLoc, + Clauses, {}); } StmtResult RebuildOpenACCWaitConstruct( @@ -4201,7 +4210,16 @@ class TreeTransform { Exprs.insert(Exprs.end(), QueueIdExprs.begin(), QueueIdExprs.end()); return getSema().OpenACC().ActOnEndStmtDirective( OpenACCDirectiveKind::Wait, BeginLoc, DirLoc, LParenLoc, QueuesLoc, - Exprs, RParenLoc, EndLoc, Clauses, {}); + Exprs, OpenACCAtomicKind::None, RParenLoc, EndLoc, Clauses, {}); + } + + StmtResult RebuildOpenACCCacheConstruct( + SourceLocation BeginLoc, SourceLocation DirLoc, SourceLocation LParenLoc, + SourceLocation ReadOnlyLoc, ArrayRef VarList, + SourceLocation RParenLoc, SourceLocation EndLoc) { + return getSema().OpenACC().ActOnEndStmtDirective( + OpenACCDirectiveKind::Cache, BeginLoc, DirLoc, LParenLoc, ReadOnlyLoc, + VarList, OpenACCAtomicKind::None, RParenLoc, EndLoc, {}, {}); } StmtResult RebuildOpenACCAtomicConstruct(SourceLocation BeginLoc, @@ -11633,7 +11651,8 @@ class OpenACCClauseTransform final if (!Res.isUsable()) continue; - Res = Self.getSema().OpenACC().ActOnVar(ParsedClause.getClauseKind(), + Res = Self.getSema().OpenACC().ActOnVar(ParsedClause.getDirectiveKind(), + ParsedClause.getClauseKind(), Res.get()); if (Res.isUsable()) @@ -11698,7 +11717,8 @@ void OpenACCClauseTransform::VisitSelfClause( if (!Res.isUsable()) continue; - Res = Self.getSema().OpenACC().ActOnVar(ParsedClause.getClauseKind(), + Res = Self.getSema().OpenACC().ActOnVar(ParsedClause.getDirectiveKind(), + ParsedClause.getClauseKind(), Res.get()); if (Res.isUsable()) @@ -11844,6 +11864,23 @@ void OpenACCClauseTransform::VisitCopyClause( ParsedClause.getVarList(), ParsedClause.getEndLoc()); } +template +void OpenACCClauseTransform::VisitLinkClause( + const OpenACCLinkClause &C) { + llvm_unreachable("link clause not valid unless a decl transform"); +} + +template +void OpenACCClauseTransform::VisitDeviceResidentClause( + const OpenACCDeviceResidentClause &C) { + llvm_unreachable("device_resident clause not valid unless a decl transform"); +} +template +void OpenACCClauseTransform::VisitNoHostClause( + const OpenACCNoHostClause &C) { + llvm_unreachable("device_resident clause not valid unless a decl transform"); +} + template void OpenACCClauseTransform::VisitCopyInClause( const OpenACCCopyInClause &C) { @@ -12632,6 +12669,37 @@ TreeTransform::TransformOpenACCWaitConstruct(OpenACCWaitConstruct *C) { DevNumExpr.isUsable() ? DevNumExpr.get() : nullptr, C->getQueuesLoc(), QueueIdExprs, C->getRParenLoc(), C->getEndLoc(), TransformedClauses); } +template +StmtResult TreeTransform::TransformOpenACCCacheConstruct( + OpenACCCacheConstruct *C) { + getSema().OpenACC().ActOnConstruct(C->getDirectiveKind(), C->getBeginLoc()); + + llvm::SmallVector TransformedVarList; + for (Expr *Var : C->getVarList()) { + assert(Var && "Null var listexpr?"); + + ExprResult NewVar = getDerived().TransformExpr(Var); + + if (!NewVar.isUsable()) + break; + + NewVar = getSema().OpenACC().ActOnVar( + C->getDirectiveKind(), OpenACCClauseKind::Invalid, NewVar.get()); + if (!NewVar.isUsable()) + break; + + TransformedVarList.push_back(NewVar.get()); + } + + if (getSema().OpenACC().ActOnStartStmtDirective(C->getDirectiveKind(), + C->getBeginLoc(), {})) + return StmtError(); + + return getDerived().RebuildOpenACCCacheConstruct( + C->getBeginLoc(), C->getDirectiveLoc(), C->getLParenLoc(), + C->getReadOnlyLoc(), TransformedVarList, C->getRParenLoc(), + C->getEndLoc()); +} template StmtResult TreeTransform::TransformOpenACCAtomicConstruct( diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp index 3a62c4ea5595b..320ee0e65dbea 100644 --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -458,6 +458,8 @@ bool serialization::isRedeclarableDeclKind(unsigned Kind) { case Decl::RequiresExprBody: case Decl::UnresolvedUsingIfExists: case Decl::HLSLBuffer: + case Decl::OpenACCDeclare: + case Decl::OpenACCRoutine: return false; // These indirectly derive from Redeclarable but are not actually diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 294e8e063e0a3..2ac9754f02eed 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5329,7 +5329,7 @@ llvm::Error ASTReader::ReadExtensionBlock(ModuleFile &F) { } } - return llvm::Error::success(); + llvm_unreachable("ReadExtensionBlock should return from while loop"); } void ASTReader::InitializeContext() { @@ -12515,7 +12515,7 @@ SmallVector ASTRecordReader::readOpenACCVarList() { unsigned NumVars = readInt(); llvm::SmallVector VarList; for (unsigned I = 0; I < NumVars; ++I) - VarList.push_back(readSubExpr()); + VarList.push_back(readExpr()); return VarList; } @@ -12733,6 +12733,8 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() { } case OpenACCClauseKind::Seq: return OpenACCSeqClause::Create(getContext(), BeginLoc, EndLoc); + case OpenACCClauseKind::NoHost: + return OpenACCNoHostClause::Create(getContext(), BeginLoc, EndLoc); case OpenACCClauseKind::Finalize: return OpenACCFinalizeClause::Create(getContext(), BeginLoc, EndLoc); case OpenACCClauseKind::IfPresent: @@ -12764,7 +12766,8 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() { llvm::SmallVector Exprs; for (unsigned I = 0; I < NumExprs; ++I) { GangKinds.push_back(readEnum()); - Exprs.push_back(readSubExpr()); + // Can't use `readSubExpr` because this is usable from a 'decl' construct. + Exprs.push_back(readExpr()); } return OpenACCGangClause::Create(getContext(), BeginLoc, LParenLoc, GangKinds, Exprs, EndLoc); @@ -12781,10 +12784,19 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() { return OpenACCVectorClause::Create(getContext(), BeginLoc, LParenLoc, VectorExpr, EndLoc); } + case OpenACCClauseKind::Link: { + SourceLocation LParenLoc = readSourceLocation(); + llvm::SmallVector VarList = readOpenACCVarList(); + return OpenACCLinkClause::Create(getContext(), BeginLoc, LParenLoc, VarList, + EndLoc); + } + case OpenACCClauseKind::DeviceResident: { + SourceLocation LParenLoc = readSourceLocation(); + llvm::SmallVector VarList = readOpenACCVarList(); + return OpenACCDeviceResidentClause::Create(getContext(), BeginLoc, + LParenLoc, VarList, EndLoc); + } - case OpenACCClauseKind::NoHost: - case OpenACCClauseKind::DeviceResident: - case OpenACCClauseKind::Link: case OpenACCClauseKind::Bind: case OpenACCClauseKind::Invalid: llvm_unreachable("Clause serialization not yet implemented"); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 17a41fff2267c..262cb0f63b462 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -414,6 +414,9 @@ class ASTDeclReader : public DeclVisitor { void VisitEmptyDecl(EmptyDecl *D); void VisitLifetimeExtendedTemporaryDecl(LifetimeExtendedTemporaryDecl *D); + void VisitOpenACCDeclareDecl(OpenACCDeclareDecl *D); + void VisitOpenACCRoutineDecl(OpenACCRoutineDecl *D); + void VisitDeclContext(DeclContext *DC, uint64_t &LexicalOffset, uint64_t &VisibleOffset, uint64_t &ModuleLocalOffset, uint64_t &TULocalOffset); @@ -3099,6 +3102,23 @@ void ASTDeclReader::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) { VisitVarDecl(D); } +void ASTDeclReader::VisitOpenACCDeclareDecl(OpenACCDeclareDecl *D) { + VisitDecl(D); + D->DirKind = Record.readEnum(); + D->DirectiveLoc = Record.readSourceLocation(); + D->EndLoc = Record.readSourceLocation(); + Record.readOpenACCClauseList(D->Clauses); +} +void ASTDeclReader::VisitOpenACCRoutineDecl(OpenACCRoutineDecl *D) { + VisitDecl(D); + D->DirKind = Record.readEnum(); + D->DirectiveLoc = Record.readSourceLocation(); + D->EndLoc = Record.readSourceLocation(); + D->ParensLoc = Record.readSourceRange(); + D->FuncRef = Record.readExpr(); + Record.readOpenACCClauseList(D->Clauses); +} + //===----------------------------------------------------------------------===// // Attribute Reading //===----------------------------------------------------------------------===// @@ -4204,6 +4224,12 @@ Decl *ASTReader::ReadDeclRecord(GlobalDeclID ID) { D = ImplicitConceptSpecializationDecl::CreateDeserialized(Context, ID, Record.readInt()); break; + case DECL_OPENACC_DECLARE: + D = OpenACCDeclareDecl::CreateDeserialized(Context, ID, Record.readInt()); + break; + case DECL_OPENACC_ROUTINE: + D = OpenACCRoutineDecl::CreateDeserialized(Context, ID, Record.readInt()); + break; } assert(D && "Unknown declaration reading AST file"); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 835ad4a658944..48f9f89bd6e4c 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2922,6 +2922,16 @@ void ASTStmtReader::VisitOpenACCWaitConstruct(OpenACCWaitConstruct *S) { } } +void ASTStmtReader::VisitOpenACCCacheConstruct(OpenACCCacheConstruct *S) { + VisitStmt(S); + (void)Record.readInt(); + VisitOpenACCConstructStmt(S); + S->ParensLoc = Record.readSourceRange(); + S->ReadOnlyLoc = Record.readSourceLocation(); + for (unsigned I = 0; I < S->NumVars; ++I) + S->getVarListPtr()[I] = cast(Record.readSubStmt()); +} + void ASTStmtReader::VisitOpenACCAtomicConstruct(OpenACCAtomicConstruct *S) { VisitStmt(S); S->Kind = Record.readEnum(); @@ -4447,6 +4457,11 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = OpenACCWaitConstruct::CreateEmpty(Context, NumExprs, NumClauses); break; } + case STMT_OPENACC_CACHE_CONSTRUCT: { + unsigned NumVars = Record[ASTStmtReader::NumStmtFields]; + S = OpenACCCacheConstruct::CreateEmpty(Context, NumVars); + break; + } case STMT_OPENACC_INIT_CONSTRUCT: { unsigned NumClauses = Record[ASTStmtReader::NumStmtFields]; S = OpenACCInitConstruct::CreateEmpty(Context, NumClauses); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index a01e72f193cf1..0aa115ecadf8e 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1135,6 +1135,8 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(DECL_OMP_DECLARE_REDUCTION); RECORD(DECL_OMP_ALLOCATE); RECORD(DECL_HLSL_BUFFER); + RECORD(DECL_OPENACC_DECLARE); + RECORD(DECL_OPENACC_ROUTINE); // Statements and Exprs can occur in the Decls and Types block. AddStmtsExprs(Stream, Record); @@ -8781,6 +8783,7 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { } case OpenACCClauseKind::Seq: case OpenACCClauseKind::Independent: + case OpenACCClauseKind::NoHost: case OpenACCClauseKind::Auto: case OpenACCClauseKind::Finalize: case OpenACCClauseKind::IfPresent: @@ -8828,10 +8831,19 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { AddStmt(const_cast(VC->getIntExpr())); return; } + case OpenACCClauseKind::Link: { + const auto *LC = cast(C); + writeSourceLocation(LC->getLParenLoc()); + writeOpenACCVarList(LC); + return; + } + case OpenACCClauseKind::DeviceResident: { + const auto *DRC = cast(C); + writeSourceLocation(DRC->getLParenLoc()); + writeOpenACCVarList(DRC); + return; + } - case OpenACCClauseKind::NoHost: - case OpenACCClauseKind::DeviceResident: - case OpenACCClauseKind::Link: case OpenACCClauseKind::Bind: case OpenACCClauseKind::Invalid: llvm_unreachable("Clause serialization not yet implemented"); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index ac80bb46afa2d..d4551348b168b 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -176,6 +176,9 @@ namespace clang { void VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D); void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D); + void VisitOpenACCDeclareDecl(OpenACCDeclareDecl *D); + void VisitOpenACCRoutineDecl(OpenACCRoutineDecl *D); + /// Add an Objective-C type parameter list to the given record. void AddObjCTypeParamList(ObjCTypeParamList *typeParams) { // Empty type parameter list. @@ -328,6 +331,12 @@ namespace clang { } bool clang::CanElideDeclDef(const Decl *D) { + bool isExternalWithNoLinkageType = false; + if (auto *VD = dyn_cast(D)) + if (VD->hasExternalFormalLinkage() && + !isExternalFormalLinkage(VD->getType()->getLinkage())) + isExternalWithNoLinkageType = true; + if (auto *FD = dyn_cast(D)) { if (FD->isInlined() || FD->isConstexpr()) return false; @@ -337,6 +346,9 @@ bool clang::CanElideDeclDef(const Decl *D) { if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) return false; + + if (isExternalWithNoLinkageType && !FD->isExternC()) + return false; } if (auto *VD = dyn_cast(D)) { @@ -350,6 +362,9 @@ bool clang::CanElideDeclDef(const Decl *D) { if (VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) return false; + + if (isExternalWithNoLinkageType && !VD->isExternC()) + return false; } return true; @@ -2258,6 +2273,27 @@ void ASTDeclWriter::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) { Code = serialization::DECL_OMP_CAPTUREDEXPR; } +void ASTDeclWriter::VisitOpenACCDeclareDecl(OpenACCDeclareDecl *D) { + Record.writeUInt32(D->clauses().size()); + VisitDecl(D); + Record.writeEnum(D->DirKind); + Record.AddSourceLocation(D->DirectiveLoc); + Record.AddSourceLocation(D->EndLoc); + Record.writeOpenACCClauseList(D->clauses()); + Code = serialization::DECL_OPENACC_DECLARE; +} +void ASTDeclWriter::VisitOpenACCRoutineDecl(OpenACCRoutineDecl *D) { + Record.writeUInt32(D->clauses().size()); + VisitDecl(D); + Record.writeEnum(D->DirKind); + Record.AddSourceLocation(D->DirectiveLoc); + Record.AddSourceLocation(D->EndLoc); + Record.AddSourceRange(D->ParensLoc); + Record.AddStmt(D->FuncRef); + Record.writeOpenACCClauseList(D->clauses()); + Code = serialization::DECL_OPENACC_ROUTINE; +} + //===----------------------------------------------------------------------===// // ASTWriter Implementation //===----------------------------------------------------------------------===// diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 82738d3a8c88a..aa5a7854394a0 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -3017,6 +3017,18 @@ void ASTStmtWriter::VisitOpenACCAtomicConstruct(OpenACCAtomicConstruct *S) { Code = serialization::STMT_OPENACC_ATOMIC_CONSTRUCT; } +void ASTStmtWriter::VisitOpenACCCacheConstruct(OpenACCCacheConstruct *S) { + VisitStmt(S); + Record.push_back(S->getVarList().size()); + VisitOpenACCConstructStmt(S); + Record.AddSourceRange(S->ParensLoc); + Record.AddSourceLocation(S->ReadOnlyLoc); + + for (Expr *E : S->getVarList()) + Record.AddStmt(E); + Code = serialization::STMT_OPENACC_CACHE_CONSTRUCT; +} + //===----------------------------------------------------------------------===// // HLSL Constructs/Directives. //===----------------------------------------------------------------------===// diff --git a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp index e9e2771c739b6..defa0701cb51f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp @@ -31,7 +31,12 @@ class DereferenceChecker : public Checker< check::Location, check::Bind, EventDispatcher > { - enum DerefKind { NullPointer, UndefinedPointerValue, AddressOfLabel }; + enum DerefKind { + NullPointer, + UndefinedPointerValue, + AddressOfLabel, + FixedAddress, + }; void reportBug(DerefKind K, ProgramStateRef State, const Stmt *S, CheckerContext &C) const; @@ -49,13 +54,13 @@ class DereferenceChecker const LocationContext *LCtx, bool loadedFrom = false); - bool SuppressAddressSpaces = false; - bool CheckNullDereference = false; + bool CheckFixedDereference = false; std::unique_ptr BT_Null; std::unique_ptr BT_Undef; std::unique_ptr BT_Label; + std::unique_ptr BT_FixedAddress; }; } // end anonymous namespace @@ -130,7 +135,9 @@ bool DereferenceChecker::suppressReport(CheckerContext &C, QualType Ty = E->getType(); if (!Ty.hasAddressSpace()) return false; - if (SuppressAddressSpaces) + if (C.getAnalysisManager() + .getAnalyzerOptions() + .ShouldSuppressAddressSpaceDereferences) return true; const llvm::Triple::ArchType Arch = @@ -155,30 +162,47 @@ static bool isDeclRefExprToReference(const Expr *E) { void DereferenceChecker::reportBug(DerefKind K, ProgramStateRef State, const Stmt *S, CheckerContext &C) const { - if (!CheckNullDereference) { - C.addSink(); - return; - } - const BugType *BT = nullptr; llvm::StringRef DerefStr1; llvm::StringRef DerefStr2; switch (K) { case DerefKind::NullPointer: + if (!CheckNullDereference) { + C.addSink(); + return; + } BT = BT_Null.get(); DerefStr1 = " results in a null pointer dereference"; DerefStr2 = " results in a dereference of a null pointer"; break; case DerefKind::UndefinedPointerValue: + if (!CheckNullDereference) { + C.addSink(); + return; + } BT = BT_Undef.get(); DerefStr1 = " results in an undefined pointer dereference"; DerefStr2 = " results in a dereference of an undefined pointer value"; break; case DerefKind::AddressOfLabel: + if (!CheckNullDereference) { + C.addSink(); + return; + } BT = BT_Label.get(); DerefStr1 = " results in an undefined pointer dereference"; DerefStr2 = " results in a dereference of an address of a label"; break; + case DerefKind::FixedAddress: + // Deliberately don't add a sink node if check is disabled. + // This situation may be valid in special cases. + if (!CheckFixedDereference) + return; + + BT = BT_FixedAddress.get(); + DerefStr1 = " results in a dereference of a fixed address"; + DerefStr2 = " results in a dereference of a fixed address"; + break; }; // Generate an error node. @@ -289,6 +313,13 @@ void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S, } } + if (location.isConstant()) { + const Expr *DerefExpr = getDereferenceExpr(S, isLoad); + if (!suppressReport(C, DerefExpr)) + reportBug(DerefKind::FixedAddress, notNullState, DerefExpr, C); + return; + } + // From this point forward, we know that the location is not null. C.addTransition(notNullState); } @@ -337,6 +368,13 @@ void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S, } } + if (V.isConstant()) { + const Expr *DerefExpr = getDereferenceExpr(S, true); + if (!suppressReport(C, DerefExpr)) + reportBug(DerefKind::FixedAddress, State, DerefExpr, C); + return; + } + // Unlike a regular null dereference, initializing a reference with a // dereferenced null pointer does not actually cause a runtime exception in // Clang's implementation of references. @@ -367,8 +405,6 @@ bool ento::shouldRegisterDereferenceModeling(const CheckerManager &) { void ento::registerNullDereferenceChecker(CheckerManager &Mgr) { auto *Chk = Mgr.getChecker(); Chk->CheckNullDereference = true; - Chk->SuppressAddressSpaces = Mgr.getAnalyzerOptions().getCheckerBooleanOption( - Mgr.getCurrentCheckerName(), "SuppressAddressSpaces"); Chk->BT_Null.reset(new BugType(Mgr.getCurrentCheckerName(), "Dereference of null pointer", categories::LogicError)); @@ -383,3 +419,16 @@ void ento::registerNullDereferenceChecker(CheckerManager &Mgr) { bool ento::shouldRegisterNullDereferenceChecker(const CheckerManager &) { return true; } + +void ento::registerFixedAddressDereferenceChecker(CheckerManager &Mgr) { + auto *Chk = Mgr.getChecker(); + Chk->CheckFixedDereference = true; + Chk->BT_FixedAddress.reset(new BugType(Mgr.getCurrentCheckerName(), + "Dereference of a fixed address", + categories::LogicError)); +} + +bool ento::shouldRegisterFixedAddressDereferenceChecker( + const CheckerManager &) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index dc86c4fcc64b1..58020ec4e084d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -140,9 +140,14 @@ bool tryToFindPtrOrigin( bool isASafeCallArg(const Expr *E) { assert(E); if (auto *Ref = dyn_cast(E)) { - if (auto *D = dyn_cast_or_null(Ref->getFoundDecl())) { + auto *FoundDecl = Ref->getFoundDecl(); + if (auto *D = dyn_cast_or_null(FoundDecl)) { if (isa(D) || D->isLocalVarDecl()) return true; + } else if (auto *BD = dyn_cast_or_null(FoundDecl)) { + VarDecl *VD = BD->getHoldingVar(); + if (VD && (isa(VD) || VD->isLocalVarDecl())) + return true; } } if (isConstOwnerPtrMemberExpr(E)) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 318fa3c1caf06..914eb0f4ef6bd 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1834,6 +1834,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OpenACCExitDataConstructClass: case Stmt::OpenACCHostDataConstructClass: case Stmt::OpenACCWaitConstructClass: + case Stmt::OpenACCCacheConstructClass: case Stmt::OpenACCInitConstructClass: case Stmt::OpenACCShutdownConstructClass: case Stmt::OpenACCSetConstructClass: @@ -1950,7 +1951,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, // to be explicitly evaluated. case Stmt::PredefinedExprClass: case Stmt::AddrLabelExprClass: - case Stmt::AttributedStmtClass: case Stmt::IntegerLiteralClass: case Stmt::FixedPointLiteralClass: case Stmt::CharacterLiteralClass: @@ -1981,6 +1981,13 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, break; } + case Stmt::AttributedStmtClass: { + Bldr.takeNodes(Pred); + VisitAttributedStmt(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + } + case Stmt::CXXDefaultArgExprClass: case Stmt::CXXDefaultInitExprClass: { Bldr.takeNodes(Pred); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index 1061dafbb2473..3d0a69a515ab8 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -797,8 +797,8 @@ void ExprEngine::VisitGuardedExpr(const Expr *Ex, // Find the predecessor block. ProgramStateRef SrcState = state; for (const ExplodedNode *N = Pred ; N ; N = *N->pred_begin()) { - ProgramPoint PP = N->getLocation(); - if (PP.getAs() || PP.getAs()) { + auto Edge = N->getLocationAs(); + if (!Edge.has_value()) { // If the state N has multiple predecessors P, it means that successors // of P are all equivalent. // In turn, that means that all nodes at P are equivalent in terms @@ -806,7 +806,7 @@ void ExprEngine::VisitGuardedExpr(const Expr *Ex, // FIXME: a more robust solution which does not walk up the tree. continue; } - SrcBlock = PP.castAs().getSrc(); + SrcBlock = Edge->getSrc(); SrcState = N->getState(); break; } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index f7020da2e6da2..7e878f922a939 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/AttrIterator.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/ParentMap.h" #include "clang/AST/StmtCXX.h" @@ -69,6 +70,7 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred, assert(ThisRD); SVal V = Call.getArgSVal(0); + const Expr *VExpr = Call.getArgExpr(0); // If the value being copied is not unknown, load from its location to get // an aggregate rvalue. @@ -76,7 +78,12 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred, V = Pred->getState()->getSVal(*L); else assert(V.isUnknownOrUndef()); - evalBind(Dst, CallExpr, Pred, ThisVal, V, true); + + ExplodedNodeSet Tmp; + evalLocation(Tmp, CallExpr, VExpr, Pred, Pred->getState(), V, + /*isLoad=*/true); + for (ExplodedNode *N : Tmp) + evalBind(Dst, CallExpr, N, ThisVal, V, true); PostStmt PS(CallExpr, LCtx); for (ExplodedNode *N : Dst) { @@ -1200,3 +1207,20 @@ void ExprEngine::VisitLambdaExpr(const LambdaExpr *LE, ExplodedNode *Pred, // FIXME: Move all post/pre visits to ::Visit(). getCheckerManager().runCheckersForPostStmt(Dst, Tmp, LE, *this); } + +void ExprEngine::VisitAttributedStmt(const AttributedStmt *A, + ExplodedNode *Pred, ExplodedNodeSet &Dst) { + ExplodedNodeSet CheckerPreStmt; + getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, A, *this); + + ExplodedNodeSet EvalSet; + StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx); + + for (const auto *Attr : getSpecificAttrs(A->getAttrs())) { + for (ExplodedNode *N : CheckerPreStmt) { + Visit(Attr->getAssumption(), N, EvalSet); + } + } + + getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, A, *this); +} diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 620fc117c6789..79cb5a07701fd 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -2533,6 +2533,15 @@ RegionStoreManager::bind(LimitedRegionBindingsConstRef B, Loc L, SVal V) { const MemRegion *R = MemRegVal->getRegion(); + // Binding directly to a symbolic region should be treated as binding + // to element 0. + if (const auto *SymReg = dyn_cast(R)) { + QualType Ty = SymReg->getPointeeStaticType(); + if (Ty->isVoidType()) + Ty = StateMgr.getContext().CharTy; + R = GetElementZeroRegion(SymReg, Ty); + } + // Check if the region is a struct region. if (const TypedValueRegion* TR = dyn_cast(R)) { QualType Ty = TR->getValueType(); @@ -2546,15 +2555,6 @@ RegionStoreManager::bind(LimitedRegionBindingsConstRef B, Loc L, SVal V) { return bindAggregate(B, TR, V); } - // Binding directly to a symbolic region should be treated as binding - // to element 0. - if (const auto *SymReg = dyn_cast(R)) { - QualType Ty = SymReg->getPointeeStaticType(); - if (Ty->isVoidType()) - Ty = StateMgr.getContext().CharTy; - R = GetElementZeroRegion(SymReg, Ty); - } - assert((!isa(R) || !B.lookup(R)) && "'this' pointer is not an l-value and is not assignable"); @@ -2570,6 +2570,9 @@ RegionStoreManager::bind(LimitedRegionBindingsConstRef B, Loc L, SVal V) { LimitedRegionBindingsRef RegionStoreManager::setImplicitDefaultValue(LimitedRegionBindingsConstRef B, const MemRegion *R, QualType T) { + if (B.hasExhaustedBindingLimit()) + return B; + SVal V; if (Loc::isLocType(T)) @@ -2596,6 +2599,8 @@ RegionStoreManager::setImplicitDefaultValue(LimitedRegionBindingsConstRef B, std::optional RegionStoreManager::tryBindSmallArray( LimitedRegionBindingsConstRef B, const TypedValueRegion *R, const ArrayType *AT, nonloc::LazyCompoundVal LCV) { + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(LCV); auto CAT = dyn_cast(AT); @@ -2632,6 +2637,8 @@ RegionStoreManager::bindArray(LimitedRegionBindingsConstRef B, const TypedValueRegion *R, SVal Init) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindArray", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(Init); const ArrayType *AT =cast(Ctx.getCanonicalType(R->getValueType())); QualType ElementTy = AT->getElementType(); @@ -2698,6 +2705,9 @@ RegionStoreManager::bindVector(LimitedRegionBindingsConstRef B, const TypedValueRegion *R, SVal V) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindVector", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(V); + QualType T = R->getValueType(); const VectorType *VT = T->castAs(); // Use castAs for typedefs. @@ -2722,6 +2732,9 @@ RegionStoreManager::bindVector(LimitedRegionBindingsConstRef B, if (VI == VE) break; + if (NewB.hasExhaustedBindingLimit()) + return NewB.withValuesEscaped(VI, VE); + NonLoc Idx = svalBuilder.makeArrayIndex(index); const ElementRegion *ER = MRMgr.getElementRegion(ElemType, Idx, R, Ctx); @@ -2758,6 +2771,9 @@ RegionStoreManager::getUniqueDefaultBinding(nonloc::LazyCompoundVal LCV) const { std::optional RegionStoreManager::tryBindSmallStruct( LimitedRegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD, nonloc::LazyCompoundVal LCV) { + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(LCV); + // If we try to copy a Conjured value representing the value of the whole // struct, don't try to element-wise copy each field. // That would unnecessarily bind Derived symbols slicing off the subregion for @@ -2822,6 +2838,9 @@ RegionStoreManager::bindStruct(LimitedRegionBindingsConstRef B, const TypedValueRegion *R, SVal V) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindStruct", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(V); + QualType T = R->getValueType(); assert(T->isStructureOrClassType()); @@ -2931,6 +2950,9 @@ RegionStoreManager::bindStruct(LimitedRegionBindingsConstRef B, ++VI; } + if (NewB.hasExhaustedBindingLimit()) + return NewB.withValuesEscaped(VI, VE); + // There may be fewer values in the initialize list than the fields of struct. if (FI != FE) { NewB = NewB.addBinding(R, BindingKey::Default, @@ -2945,6 +2967,9 @@ RegionStoreManager::bindAggregate(LimitedRegionBindingsConstRef B, const TypedRegion *R, SVal Val) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindAggregate", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(Val); + // Remove the old bindings, using 'R' as the root of all regions // we will invalidate. Then add the new binding. return removeSubRegionBindings(B, R).addBinding(R, BindingKey::Default, Val); diff --git a/clang/lib/Testing/CMakeLists.txt b/clang/lib/Testing/CMakeLists.txt index 4a2400d6a107a..f4c99413d6dbf 100644 --- a/clang/lib/Testing/CMakeLists.txt +++ b/clang/lib/Testing/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_library(clangTesting LINK_COMPONENTS MC Support + TargetParser ) clang_target_link_libraries(clangTesting diff --git a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt index 66795b0be0baa..6b500a183bcfc 100644 --- a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt +++ b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt @@ -1,5 +1,4 @@ set(LLVM_LINK_COMPONENTS - ${LLVM_TARGETS_TO_BUILD} Core Option Support diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp index 4fb5977580497..96fe40c079c65 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" -#include "llvm/Support/TargetSelect.h" using namespace clang; using namespace tooling; @@ -17,10 +16,4 @@ DependencyScanningService::DependencyScanningService( ScanningMode Mode, ScanningOutputFormat Format, ScanningOptimizations OptimizeArgs, bool EagerLoadModules, bool TraceVFS) : Mode(Mode), Format(Format), OptimizeArgs(OptimizeArgs), - EagerLoadModules(EagerLoadModules), TraceVFS(TraceVFS) { - // Initialize targets for object file support. - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllAsmParsers(); -} + EagerLoadModules(EagerLoadModules), TraceVFS(TraceVFS) {} diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index d51b039d40043..75380f99901a2 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -1009,8 +1009,7 @@ namespace shufflevector { namespace FunctionStart { void a(void) {} static_assert(__builtin_function_start(a) == a, ""); // both-error {{not an integral constant expression}} \ - // ref-note {{comparison against opaque constant address '&__builtin_function_start(a)'}} \ - // expected-note {{comparison of addresses of potentially overlapping literals has unspecified value}} + // both-note {{comparison against opaque constant address '&__builtin_function_start(a)'}} } namespace BuiltinInImplicitCtor { diff --git a/clang/test/AST/ByteCode/cxx1z.cpp b/clang/test/AST/ByteCode/cxx1z.cpp index 57f99235a2b20..ca5f10f6567b4 100644 --- a/clang/test/AST/ByteCode/cxx1z.cpp +++ b/clang/test/AST/ByteCode/cxx1z.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fexperimental-new-constant-interpreter -std=c++17 -verify=expected,both %s // RUN: %clang_cc1 -std=c++17 -verify=ref,both %s -template struct A {}; +template struct A {}; // both-note 6{{template parameter is declared here}} namespace Temp { struct S { int n; }; constexpr S &addr(S &&s) { return s; } diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp index 06501de64916a..d39c2281ec146 100644 --- a/clang/test/AST/ByteCode/cxx20.cpp +++ b/clang/test/AST/ByteCode/cxx20.cpp @@ -897,7 +897,7 @@ namespace VirtDtor { } namespace TemporaryInNTTP { - template struct B { /* ... */ }; + template struct B { /* ... */ }; // both-note {{template parameter is declared here}} struct J1 { J1 *self=this; }; diff --git a/clang/test/AST/ByteCode/cxx98.cpp b/clang/test/AST/ByteCode/cxx98.cpp index c17049b01c1da..9af668029f8a3 100644 --- a/clang/test/AST/ByteCode/cxx98.cpp +++ b/clang/test/AST/ByteCode/cxx98.cpp @@ -6,7 +6,7 @@ namespace IntOrEnum { const int k = 0; const int &p = k; // both-note {{declared here}} - template struct S {}; + template struct S {}; // both-note {{template parameter is declared here}} S

s; // both-error {{not an integral constant expression}} \ // both-note {{read of variable 'p' of non-integral, non-enumeration type 'const int &'}} } diff --git a/clang/test/AST/ByteCode/functions.cpp b/clang/test/AST/ByteCode/functions.cpp index 66693a1fd7e32..a767d104b3c8a 100644 --- a/clang/test/AST/ByteCode/functions.cpp +++ b/clang/test/AST/ByteCode/functions.cpp @@ -484,6 +484,18 @@ namespace AddressOf { void testAddressof(int x) { static_assert(&x == __builtin_addressof(x), ""); } + + struct TS { + constexpr bool f(TS s) const { + /// The addressof call has a CXXConstructExpr as a parameter. + return this != __builtin_addressof(s); + } + }; + constexpr bool exprAddressOf() { + TS s; + return s.f(s); + } + static_assert(exprAddressOf(), ""); } namespace std { @@ -669,3 +681,16 @@ namespace StableAddress { static_assert(sum() == 1234, ""); } #endif + +namespace NoDiags { + void huh(); + template + constexpr void hd_fun() { + huh(); + } + + constexpr bool foo() { + hd_fun<1>(); + return true; + } +} diff --git a/clang/test/AST/ByteCode/libcxx/deref-to-array.cpp b/clang/test/AST/ByteCode/libcxx/deref-to-array.cpp new file mode 100644 index 0000000000000..2a527ab336a0d --- /dev/null +++ b/clang/test/AST/ByteCode/libcxx/deref-to-array.cpp @@ -0,0 +1,391 @@ +// RUN: %clang_cc1 -std=c++2c -fexperimental-new-constant-interpreter -verify=expected,both %s +// RUN: %clang_cc1 -std=c++2c -verify=ref,both %s + +// both-no-diagnostics + +namespace std { +inline namespace { +template struct integral_constant { + static const _Tp value = __v; +}; +template using _BoolConstant = integral_constant; +template using __remove_cv_t = __remove_cv(_Tp); +template using remove_cv_t = __remove_cv_t<_Tp>; +template +constexpr bool is_convertible_v = __is_convertible(_From, _To); +template _Tp __declval(long); +template decltype(__declval<_Tp>(0)) declval(); +template +concept convertible_to = is_convertible_v<_From, _To> && + requires { static_cast<_To>(declval<_From>()); }; +template constexpr bool is_reference_v = __is_reference(_Tp); +template +constexpr bool is_lvalue_reference_v = __is_lvalue_reference(_Tp); +template +constexpr bool is_nothrow_destructible_v = + integral_constant::value; +template +concept destructible = is_nothrow_destructible_v<_Tp>; +template +using _IsSame = _BoolConstant<__is_same(_Tp, _Up)>; +template +constexpr bool is_constructible_v = __is_constructible(_Args...); +template +concept constructible_from = destructible<_Tp> && is_constructible_v<_Tp>; +template +concept move_constructible = + constructible_from<_Tp, _Tp> && convertible_to<_Tp, _Tp>; +template +concept __same_as_impl = _IsSame<_Tp, _Up>::value; +template +concept same_as = __same_as_impl<_Tp, _Up> && __same_as_impl<_Up, _Tp>; +template struct _IfImpl; +template <> struct _IfImpl { + template using _Select = _ElseRes; +}; +template +using _If = _IfImpl<_Cond>::template _Select<_IfRes, _ElseRes>; +template struct conditional { + using type = _If; +}; +template +using conditional_t = conditional<_IfRes>::type; +template +using __libcpp_remove_reference_t = __remove_reference_t(_Tp); +template +using remove_reference_t = __libcpp_remove_reference_t<_Tp>; +template using __decay_t = __decay(_Tp); +template using __remove_cvref_t = __remove_cvref(_Tp); +template using remove_cvref_t = __remove_cvref_t<_Tp>; +struct __copy_cv { + template using __apply = _To; +}; +template using __copy_cv_t = __copy_cv::__apply<_To>; +template +using __cond_res = + decltype(false ? std::declval<_Xp (&)()>()() : std::declval<_Yp (&)()>()()); +template , + class = remove_reference_t<_Bp>> +struct __common_ref; +template +struct __common_ref : __common_ref<_Bp, _Ap> {}; +template +using __common_ref_t = __common_ref<_Xp, _Yp>::__type; +template +using __cv_cond_res = + __cond_res<__copy_cv_t<_Xp, _Yp> &, __copy_cv_t<_Yp, _Xp> &>; +template + requires requires { typename __cv_cond_res<_Xp, _Yp>; } && + is_reference_v<__cv_cond_res<_Xp, _Yp>> +struct __common_ref<_Ap, _Bp &, _Xp, _Yp> { + using __type = __cv_cond_res<_Xp, _Yp>; +}; +template +using __common_ref_D = __common_ref_t; +template + requires requires { typename __common_ref_D<_Xp, _Yp>; } && + is_convertible_v<_Ap, __common_ref_D<_Xp, _Yp>> +struct __common_ref<_Ap &&, _Bp &, _Xp, _Yp> { + using __type = __common_ref_D<_Xp, _Yp>; +}; +template struct common_reference; +template +using common_reference_t = common_reference<_Types...>::type; +template struct __common_reference_sub_bullet1; +template +struct common_reference<_Tp, _Up> : __common_reference_sub_bullet1<_Tp, _Up> {}; +template + requires is_reference_v<_Tp> && is_reference_v<_Up> && + requires { typename __common_ref_t<_Tp, _Up>; } +struct __common_reference_sub_bullet1<_Tp, _Up> { + using type = __common_ref_t<_Tp, _Up>; +}; +template +concept common_reference_with = + same_as, common_reference_t<_Up, _Tp>> && + convertible_to<_Tp, common_reference_t<_Tp, _Up>> && + convertible_to<_Up, common_reference_t<_Tp, _Up>>; +template +using __make_const_lvalue_ref = __libcpp_remove_reference_t<_Tp> &; +template +concept assignable_from = + is_lvalue_reference_v<_Lhs> && + common_reference_with<__make_const_lvalue_ref<_Lhs>, + __make_const_lvalue_ref<_Rhs>> && + requires(_Lhs __lhs, _Rhs __rhs) { + { __rhs } -> same_as<_Lhs>; + }; +template constexpr __libcpp_remove_reference_t<_Tp> &&move(_Tp &&); +typedef int type; +template using __enable_if_t = type; +namespace ranges { +inline namespace { +auto swap = int{}; +} +} // namespace ranges +template constexpr bool is_object_v = __is_object(_Tp); +template +concept movable = is_object_v<_Tp> && move_constructible<_Tp> && + assignable_from<_Tp &, _Tp>; +template struct tuple_element; +template class tuple; +template