diff --git a/.github/workflows/upload_indexhtml.yml b/.github/workflows/upload_indexhtml.yml index 15ad636b..7db24563 100644 --- a/.github/workflows/upload_indexhtml.yml +++ b/.github/workflows/upload_indexhtml.yml @@ -4,7 +4,7 @@ name: Deploy static content to Pages on: # Runs on pushes targeting the default branch push: - branches: ["main", "primitive_table_vis"] + branches: ["main"] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -31,6 +31,30 @@ jobs: steps: - name: Checkout uses: actions/checkout@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + - + name: 'Install packages for generator' + run: | + sudo apt-get update + sudo apt-get install graphviz-dev util-linux + - + name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - + name: 'Install python dependencies for generator' + run: | + python -m pip install --upgrade pip + pip3 install ruff + if [ -f requirements.txt ]; then pip3 install -r requirements.txt; fi + - + name: Try generating the whole TSL with python ${{ matrix.python-version }} + id: generate + continue-on-error: true + run: | + python main.py --print-outputs-only - name: Setup Pages uses: actions/configure-pages@v3 - name: Upload artifact diff --git a/doc/media/tsl_logo_small.png b/doc/media/tsl_logo_small.png new file mode 100755 index 00000000..8bcd9bc9 Binary files /dev/null and b/doc/media/tsl_logo_small.png differ diff --git a/generator/config/default_conf.yaml b/generator/config/default_conf.yaml index 4e5edd73..071caff2 100644 --- a/generator/config/default_conf.yaml +++ b/generator/config/default_conf.yaml @@ -23,6 +23,10 @@ configuration: primitive_definitions: "definitions" silent_warnings: ["-Wno-ignored-attributes", "-Wno-attributes"] expansions: + primitive_vis: + enabled: True + template_path: "generator/config/generator/tsl_templates/expansions/primitive_table.template" + target_path: "./index.html" cmake: enabled: True minimum_version: "3.13" diff --git a/generator/config/generator/tsl_templates/expansions/primitive_table.template b/generator/config/generator/tsl_templates/expansions/primitive_table.template new file mode 100755 index 00000000..10b6802d --- /dev/null +++ b/generator/config/generator/tsl_templates/expansions/primitive_table.template @@ -0,0 +1,226 @@ + + +
+ + + + +Brief: Packs elements from a vector together using a fixed bitwidth.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | + | + | - | - |
int8_t | - | - | - | - | + | + | - | - |
uint16_t | - | - | - | - | + | + | - | - |
int16_t | - | - | - | - | + | + | - | - |
uint32_t | - | - | - | - | + | + | - | - |
int32_t | - | - | - | - | + | + | - | - |
uint64_t | - | - | - | - | + | + | - | - |
int64_t | - | - | - | - | + | + | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Packs elements from a vector together using a fixed bitwidth.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | + | + | - | - |
int8_t | - | - | - | - | + | + | - | - |
uint16_t | - | - | - | - | + | + | - | - |
int16_t | - | - | - | - | + | + | - | - |
uint32_t | - | - | - | - | + | + | - | - |
int32_t | - | - | - | - | + | + | - | - |
uint64_t | - | - | - | - | + | + | - | - |
int64_t | - | - | - | - | + | + | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | + | + | - | - |
int8_t | - | - | - | - | + | + | - | - |
uint16_t | - | - | - | - | + | + | - | - |
int16_t | - | - | - | - | + | + | - | - |
uint32_t | - | - | - | - | + | + | - | - |
int32_t | - | - | - | - | + | + | - | - |
uint64_t | - | - | - | - | + | + | - | - |
int64_t | - | - | - | - | + | + | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | + | + | - | - |
int8_t | - | - | - | - | + | + | - | - |
uint16_t | - | - | - | - | + | + | - | - |
int16_t | - | - | - | - | + | + | - | - |
uint32_t | - | - | - | - | + | + | - | - |
int32_t | - | - | - | - | + | + | - | - |
uint64_t | - | - | - | - | + | + | - | - |
int64_t | - | - | - | - | + | + | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Adds two vector registers.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | + | + | + | + | + | + |
int8_t | + | + | + | + | + | + | + | + |
uint16_t | + | + | + | + | + | + | + | + |
int16_t | + | + | + | + | + | + | + | + |
uint32_t | + | + | + | + | + | + | + | + |
int32_t | + | + | + | + | + | + | + | + |
uint64_t | + | + | + | + | + | + | + | + |
int64_t | + | + | + | + | + | + | + | + |
float | + | + | + | + | + | + | + | + |
double | + | + | + | + | + | + | + | + |
Brief: Subtracts two vector registers.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | + | + | + | + | + | + |
int8_t | + | + | + | + | + | + | + | + |
uint16_t | + | + | + | + | + | + | + | + |
int16_t | + | + | + | + | + | + | + | + |
uint32_t | + | + | + | + | + | + | + | + |
int32_t | + | + | + | + | + | + | + | + |
uint64_t | + | + | + | + | + | + | + | + |
int64_t | + | + | + | + | + | + | + | + |
float | + | + | + | + | + | + | + | + |
double | + | + | + | + | + | + | + | + |
Brief: Adds two vector registers, depending on a mask: result[*] = (m[*])? vec_a[*]+vec_b[*] : vec_a[*].
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | - | - | - | - | - | + | + |
int8_t | + | - | - | - | - | - | + | + |
uint16_t | + | - | - | - | - | - | + | + |
int16_t | + | - | - | - | - | - | + | + |
uint32_t | + | - | - | - | - | - | + | + |
int32_t | + | - | - | - | - | - | + | + |
uint64_t | + | - | - | - | - | - | + | + |
int64_t | + | - | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Multiplies two vector registers.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Reduces the elements to a sum.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: compares the values of 2 vectors and returns a vector with the minimum of each corrisponding values
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | - | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | - | + | + | + | - |
int64_t | + | + | - | - | + | + | + | - |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Divides two vector registers.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | - | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | - | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | - | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | - | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Operates the modulo operation on one datavector modulo one input value.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | + | + | + | + |
int8_t | + | + | - | - | + | + | + | + |
uint16_t | + | + | - | - | + | + | + | + |
int16_t | + | + | - | - | + | + | + | + |
uint32_t | + | + | - | - | + | + | + | + |
int32_t | + | + | - | - | + | + | + | + |
uint64_t | + | + | - | - | + | + | + | + |
int64_t | + | + | - | - | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Reduces the elements to the maximum value.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | + | + | - | - |
int8_t | - | - | - | - | + | + | - | - |
uint16_t | - | - | - | - | + | + | - | - |
int16_t | - | - | - | - | + | + | - | - |
uint32_t | - | - | - | - | + | + | - | - |
int32_t | - | - | - | - | + | + | - | - |
uint64_t | - | - | - | - | + | + | - | - |
int64_t | - | - | - | - | + | + | - | - |
float | - | - | - | - | + | + | - | - |
double | - | - | - | - | + | + | - | - |
Brief: Reduces the elements to the maximum value.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | + | + | - | - |
int8_t | - | - | - | - | + | + | - | - |
uint16_t | - | - | - | - | + | + | - | - |
int16_t | - | - | - | - | + | + | - | - |
uint32_t | - | - | - | - | + | + | - | - |
int32_t | - | - | - | - | + | + | - | - |
uint64_t | - | - | - | - | + | + | - | - |
int64_t | - | - | - | - | + | + | - | - |
float | - | - | - | - | + | + | - | - |
double | - | - | - | - | + | + | - | - |
Brief: Forms an integral value from the most significant bits of every lane in a vector mask register.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | + | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Forms an vector register from an integral where all bits are set in a lane if the corresponding mask bit is set to 1.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | + | + | - | + |
int8_t | + | + | - | - | + | + | - | + |
uint16_t | + | + | - | - | + | + | - | + |
int16_t | + | + | - | - | + | + | - | + |
uint32_t | + | + | - | - | + | + | - | + |
int32_t | + | + | - | - | + | + | - | + |
uint64_t | + | + | - | - | + | + | - | + |
int64_t | + | + | - | + | + | + | - | + |
float | + | + | - | - | + | + | - | + |
double | + | + | - | - | + | + | - | + |
Brief: Forms a mask type from an integral.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Binary NOT of a vector integral mask type.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | - | - |
int8_t | + | + | - | - | - | - | - | - |
uint16_t | + | + | - | - | - | - | - | - |
int16_t | + | + | - | - | - | - | - | - |
uint32_t | + | + | - | - | - | - | - | - |
int32_t | + | + | - | - | - | - | - | - |
uint64_t | + | + | - | - | - | - | - | - |
int64_t | + | + | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Binary NOT of a vector mask type.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | - | - |
int8_t | + | + | - | - | - | - | - | - |
uint16_t | + | + | - | - | - | - | - | - |
int16_t | + | + | - | - | - | - | - | - |
uint32_t | + | + | - | - | - | - | - | - |
int32_t | + | + | - | - | - | - | - | - |
uint64_t | + | + | - | - | - | - | - | - |
int64_t | + | + | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Binary AND of two vector mask types.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | - | - | - | + |
int8_t | + | + | - | + | - | - | - | + |
uint16_t | + | + | - | + | - | - | - | + |
int16_t | + | + | - | + | - | - | - | + |
uint32_t | + | + | - | + | - | - | - | + |
int32_t | + | + | - | + | - | - | - | + |
uint64_t | + | + | - | + | - | - | - | + |
int64_t | + | + | - | + | - | - | - | + |
float | + | + | - | + | - | - | - | + |
double | + | + | - | + | - | - | - | + |
Brief: Binary AND of two vector integral mask types.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | - | - | + | - | - | - | + |
int8_t | + | - | - | + | - | - | - | + |
uint16_t | + | - | - | + | - | - | - | + |
int16_t | + | - | - | + | - | - | - | + |
uint32_t | + | - | - | + | - | - | - | + |
int32_t | + | - | - | + | - | - | - | + |
uint64_t | + | - | - | + | - | - | - | + |
int64_t | + | - | - | + | - | - | - | + |
float | + | - | - | + | - | - | - | + |
double | + | - | - | + | - | - | - | + |
Brief: Binary OR of two vector mask types.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | - | - | - | + |
int8_t | + | + | - | + | - | - | - | + |
uint16_t | + | + | - | + | - | - | - | + |
int16_t | + | + | - | + | - | - | - | + |
uint32_t | + | + | - | + | - | - | - | + |
int32_t | + | + | - | + | - | - | - | + |
uint64_t | + | + | - | + | - | - | - | + |
int64_t | + | + | - | + | - | - | - | + |
float | + | + | - | + | - | - | - | + |
double | + | + | - | + | - | - | - | + |
Brief: Binary OR of two vector integral mask types.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | - | - | + | - | - | - | + |
int8_t | + | - | - | + | - | - | - | + |
uint16_t | + | - | - | + | - | - | - | + |
int16_t | + | - | - | + | - | - | - | + |
uint32_t | + | - | - | + | - | - | - | + |
int32_t | + | - | - | + | - | - | - | + |
uint64_t | + | - | - | + | - | - | - | + |
int64_t | + | - | - | + | - | - | - | + |
float | + | - | - | + | - | - | - | + |
double | + | - | - | + | - | - | - | + |
Brief: Binary XOR of two vector mask types.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | - | - | - | + |
int8_t | + | + | - | + | - | - | - | + |
uint16_t | + | + | - | + | - | - | - | + |
int16_t | + | + | - | + | - | - | - | + |
uint32_t | + | + | - | + | - | - | - | + |
int32_t | + | + | - | + | - | - | - | + |
uint64_t | + | + | - | + | - | - | - | + |
int64_t | + | + | - | + | - | - | - | + |
float | + | + | - | + | - | - | - | + |
double | + | + | - | + | - | - | - | + |
Brief: Binary XOR of two vector integral mask types.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | - | - | + | - | - | - | + |
int8_t | + | - | - | + | - | - | - | + |
uint16_t | + | - | - | + | - | - | - | + |
int16_t | + | - | - | + | - | - | - | + |
uint32_t | + | - | - | + | - | - | - | + |
int32_t | + | - | - | + | - | - | - | + |
uint64_t | + | - | - | + | - | - | - | + |
int64_t | + | - | - | + | - | - | - | + |
float | + | - | - | + | - | - | - | + |
double | + | - | - | + | - | - | - | + |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Loads data from memory to a mask.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | - | + |
int8_t | + | + | - | - | - | - | - | + |
uint16_t | + | + | - | - | - | - | - | + |
int16_t | + | + | - | - | - | - | - | + |
uint32_t | + | + | - | - | - | - | - | + |
int32_t | + | + | - | - | - | - | - | + |
uint64_t | + | + | - | - | - | - | - | + |
int64_t | + | + | - | - | - | - | - | + |
float | + | + | - | - | - | - | - | + |
double | + | + | - | - | - | - | - | + |
Brief: Compares two vector registers for equality.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | - |
double | + | + | - | + | + | + | + | - |
Brief: Compares two vector registers for equality.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | - | - | - | - | - | - | - |
int8_t | + | - | - | - | - | - | - | - |
uint16_t | + | - | - | - | - | - | - | - |
int16_t | + | - | - | - | - | - | - | - |
uint32_t | + | - | - | - | - | - | - | - |
int32_t | + | - | - | - | - | - | - | - |
uint64_t | + | - | - | - | - | - | - | - |
int64_t | + | - | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Compares two vector registers for inequality.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | + | + | + | + |
int8_t | + | + | - | - | + | + | + | + |
uint16_t | + | + | - | - | + | + | + | + |
int16_t | + | + | - | - | + | + | + | + |
uint32_t | + | + | - | - | + | + | + | + |
int32_t | + | + | - | - | + | + | + | + |
uint64_t | + | + | - | - | + | + | + | + |
int64_t | + | + | - | - | + | + | + | + |
float | + | + | - | - | + | + | + | - |
double | + | + | - | - | + | + | + | - |
Brief: Checks if the values of a vector are in a specific range (min[*] <= d[*] <= max[*]).
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | + | + | + | - |
int8_t | - | - | - | - | + | + | + | - |
uint16_t | - | - | - | - | + | + | + | - |
int16_t | - | - | - | - | + | + | + | - |
uint32_t | - | - | - | - | + | + | + | - |
int32_t | - | - | - | - | + | + | + | - |
uint64_t | - | - | - | - | + | + | + | - |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | - | + | + | + | + |
double | + | + | - | - | + | + | + | + |
Brief: Tests whether left elements are smaller than the corresponding right ones.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Tests whether left elements are larger than or equal to the corresponding right ones.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Tests whether left elements are smaller than or equal to the corresponding right ones.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Tests whether left elements are larger than the corresponding right ones.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Checks if the vector register contains at least one value unequal zero.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Counts number of matches of a chosen value within a vector register.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Binary ANDs two vector registers.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | - | - | + | + |
double | + | + | - | + | - | - | + | + |
Brief: Binary ANDs two vector registers.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | - | - | + | + |
double | + | + | - | + | - | - | + | + |
Brief: Binary XORs two vector registers.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | - | - | + | + |
double | + | + | - | + | - | - | + | + |
Brief: Arithmetic shift of data to the left by n bits.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | + | + | + | + | - |
int8_t | - | - | - | + | + | + | + | - |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | + | - | - | - | - |
double | - | - | - | + | - | - | - | - |
Brief: Shifts data to left by n bits (shifting in 0).
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | + | + | + | + | - |
int8_t | - | - | - | + | + | + | + | - |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Arithmetic shift of data to the right by n bits.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | + | + | + | + | - |
int8_t | - | - | - | + | + | + | + | - |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Arithmetic shift of data to the right by n bits.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | + | + | + | + | - |
int8_t | - | - | - | + | + | + | + | - |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Leading zeros counter.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | + | - | - | - |
int8_t | - | - | - | - | + | - | - | - |
uint16_t | - | - | - | - | + | - | - | - |
int16_t | - | - | - | - | + | - | - | - |
uint32_t | + | + | - | - | + | + | - | + |
int32_t | + | + | - | - | + | + | - | + |
uint64_t | + | + | - | - | + | - | - | + |
int64_t | + | + | - | - | + | - | - | + |
float | - | - | - | - | + | + | - | - |
double | - | - | - | - | + | - | - | - |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Operates horizontal OR on vector register
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | + | + | + | + |
int8_t | + | + | - | - | + | + | + | + |
uint16_t | + | + | - | - | + | + | + | + |
int16_t | + | + | - | - | + | + | + | + |
uint32_t | + | + | - | - | + | + | + | + |
int32_t | + | + | - | - | + | + | + | + |
uint64_t | + | + | - | - | + | + | + | + |
int64_t | + | + | - | - | + | + | + | + |
float | + | + | - | - | + | + | + | + |
double | + | + | - | - | + | + | + | + |
Brief: Bitwise invertion values in vector Register.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | + | + | + | + |
int8_t | + | + | - | - | + | + | + | + |
uint16_t | + | + | - | - | + | + | + | + |
int16_t | + | + | - | - | + | + | + | + |
uint32_t | + | + | - | - | + | + | + | + |
int32_t | + | + | - | - | + | + | + | + |
uint64_t | + | + | - | - | + | + | + | + |
int64_t | + | + | - | - | + | + | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | + | + | + | + |
double | + | + | - | - | + | + | + | + |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | - | - | - | - | - | - | + |
int8_t | + | - | - | - | - | - | - | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | + | - | - | - | - | - | - |
int32_t | - | + | - | - | - | - | - | - |
uint64_t | - | - | - | - | - | - | - | - |
int64_t | - | - | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | + | - | - | - | - | - | - | - |
int32_t | + | - | - | - | - | - | - | - |
uint64_t | - | - | - | - | - | - | - | - |
int64_t | - | - | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | - | - | - | - | - | - | + |
int8_t | + | - | - | - | - | - | - | + |
uint16_t | + | - | - | - | - | - | - | + |
int16_t | + | - | - | - | - | - | - | + |
uint32_t | + | - | - | - | - | - | - | + |
int32_t | + | - | - | - | - | - | - | + |
uint64_t | + | - | - | - | - | - | - | - |
int64_t | + | - | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | + | - | - | - | - | - | - | - |
int32_t | + | - | - | - | - | - | - | - |
uint64_t | + | - | - | - | - | - | - | - |
int64_t | + | - | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Loads data from aligned memory into a vector register.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | - | + |
int8_t | + | + | - | - | - | - | - | + |
uint16_t | + | + | - | - | - | - | - | + |
int16_t | + | + | - | - | - | - | - | + |
uint32_t | + | + | - | - | - | - | - | + |
int32_t | + | + | - | - | - | - | - | + |
uint64_t | + | + | - | - | - | - | - | + |
int64_t | + | + | - | + | - | - | - | + |
float | + | + | - | - | - | - | - | + |
double | + | + | - | - | - | - | - | + |
Brief: Allocates (unaligned) contiguous memory.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Allocates aligned contiguous memory.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Deallocates (possibly aligned) contiguous memory.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Copy memory.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Checks whether all elements are unique in a register.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Checks whether all elements are unique in a register and returns a mask indicating which elements don't have preceeding conflicts.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Blends two registers using provided bitmask.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | - | + |
int8_t | + | + | - | - | - | - | - | + |
uint16_t | + | + | - | - | - | - | - | + |
int16_t | + | + | - | - | - | - | - | + |
uint32_t | + | + | - | - | - | - | - | + |
int32_t | + | + | - | - | - | - | - | + |
uint64_t | + | + | - | - | - | - | - | + |
int64_t | + | + | - | - | - | - | - | + |
float | + | + | - | - | - | - | - | + |
double | + | + | - | - | - | - | - | + |
Brief: Blends or add two registers using provided bitmask
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | - | - |
int8_t | + | + | - | - | - | - | - | - |
uint16_t | + | + | - | - | - | - | - | - |
int16_t | + | + | - | - | - | - | - | - |
uint32_t | + | + | - | - | - | - | - | - |
int32_t | + | + | - | - | - | - | - | - |
uint64_t | + | + | - | - | - | - | - | - |
int64_t | + | + | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Returns a vector register with undefined data inside.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | - | + |
int8_t | + | + | - | - | - | - | - | + |
uint16_t | + | + | - | - | - | - | - | + |
int16_t | + | + | - | - | - | - | - | + |
uint32_t | + | + | - | - | - | - | - | + |
int32_t | + | + | - | - | - | - | - | + |
uint64_t | + | + | - | - | - | - | - | + |
int64_t | + | + | - | - | - | - | - | + |
float | + | + | - | - | - | - | - | + |
double | + | + | - | - | - | - | - | + |
Brief: Loads data from aligned memory into a vector register.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | + | + | + | + |
int8_t | + | + | - | - | + | + | + | + |
uint16_t | + | + | - | - | + | + | + | + |
int16_t | + | + | - | - | + | + | + | + |
uint32_t | + | + | - | - | + | + | + | + |
int32_t | + | + | - | - | + | + | + | + |
uint64_t | + | + | - | - | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | - | + | + | + | + |
double | + | + | - | - | + | + | + | + |
Brief: Loads data from (un)aligned memory into a vector register.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Stores data from a vector register to aligned memory.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | + | + | + | + |
int8_t | + | + | - | - | + | + | + | + |
uint16_t | + | + | - | - | + | + | + | + |
int16_t | + | + | - | - | + | + | + | + |
uint32_t | + | + | - | - | + | + | + | + |
int32_t | + | + | - | - | + | + | + | + |
uint64_t | + | + | - | - | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | - | + | + | + | + |
double | + | + | - | - | + | + | + | + |
Brief: Stores data from a vector register to (un)aligned memory.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | + | + | + | + |
int8_t | + | + | - | - | + | + | + | + |
uint16_t | + | + | - | - | + | + | + | + |
int16_t | + | + | - | - | + | + | + | + |
uint32_t | + | + | - | - | + | + | + | + |
int32_t | + | + | - | - | + | + | + | + |
uint64_t | + | + | - | - | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | - | + | + | + | + |
double | + | + | - | - | + | + | + | + |
Brief: Stores SIMD register to array.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Broadcasts a single value into all lanes of a vector register.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Set all lanes to zero.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | + | + | + | + |
int8_t | + | + | - | - | + | + | + | + |
uint16_t | + | + | - | - | + | + | + | + |
int16_t | + | + | - | - | + | + | + | + |
uint32_t | + | + | - | - | + | + | + | + |
int32_t | + | + | - | - | + | + | + | + |
uint64_t | + | + | - | - | + | + | + | + |
int64_t | + | + | - | - | + | + | + | + |
float | + | + | - | - | + | + | + | + |
double | + | + | - | - | + | + | + | + |
Brief: Transfers provided elements into a vector register.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Creates a sequence [0..SIMD-Reg-Element-Count].
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Creates a sequence.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Transfers data from arbitrary locations into a vector register.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | - | - | + | + |
int8_t | - | - | - | - | - | - | + | + |
uint16_t | - | - | - | - | - | - | + | + |
int16_t | - | - | - | - | - | - | + | + |
uint32_t | - | + | - | - | - | - | + | + |
int32_t | - | + | - | - | - | - | + | + |
uint64_t | - | + | - | - | - | - | + | + |
int64_t | - | + | - | + | - | - | + | + |
float | - | + | - | - | - | - | + | + |
double | - | + | - | - | - | - | + | + |
Brief: If mask[i] is 1, load memory[index[i] * scale], otherwise use source[i]
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | - | - | - | - | - | - | + | + |
int8_t | - | - | - | - | - | - | + | + |
uint16_t | - | - | - | - | - | - | + | + |
int16_t | - | - | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | + | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Transfers data from a vector register to an arbitrary locations.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | - | - | + | - | - | + | + |
int8_t | + | - | - | + | - | - | + | + |
uint16_t | + | - | - | + | - | - | + | + |
int16_t | + | - | - | + | - | - | + | + |
uint32_t | + | + | - | + | - | - | + | + |
int32_t | + | + | - | + | - | - | + | + |
uint64_t | + | + | - | + | - | - | + | + |
int64_t | + | + | - | + | - | - | + | + |
float | + | + | - | + | - | - | + | + |
double | + | + | - | + | - | - | + | + |
Brief: Transfers data from a vector register to an arbitrary locations.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | - | - | + | - | - | + | + |
int8_t | + | - | - | + | - | - | + | + |
uint16_t | + | - | - | + | - | - | + | + |
int16_t | + | - | - | + | - | - | + | + |
uint32_t | + | + | - | + | - | - | + | + |
int32_t | + | + | - | + | - | - | + | + |
uint64_t | + | + | - | + | - | - | + | + |
int64_t | + | + | - | + | - | - | + | + |
float | + | + | - | + | - | - | + | + |
double | + | + | - | + | - | - | + | + |
Brief: Stores elements from data consecutively, if the corresponding bit in mask is set to 1.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Loads contiguos data from a specified memory location and puts the elements using write mask.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | - | - |
int8_t | + | + | - | - | - | - | - | - |
uint16_t | + | + | - | - | - | - | - | - |
int16_t | + | + | - | - | - | - | - | - |
uint32_t | + | + | - | - | - | - | - | - |
int32_t | + | + | - | - | - | - | - | - |
uint64_t | + | + | - | - | - | - | - | - |
int64_t | + | + | - | - | - | - | - | - |
float | + | + | - | - | - | - | - | - |
double | + | + | - | - | - | - | - | - |
Brief:
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | - | - | - | - | - | - | - |
int8_t | + | - | - | - | - | - | - | - |
uint16_t | + | - | - | - | - | - | - | - |
int16_t | + | - | - | - | - | - | - | - |
uint32_t | + | - | - | - | - | - | - | - |
int32_t | + | - | - | - | - | - | - | - |
uint64_t | - | - | - | - | - | - | - | - |
int64_t | - | - | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Partially override a Vector with a single value.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Copy elements from a vector, where the mask bit it set, otherwise write zero
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Merge two vectors while picking the source of each element based on the corresponding mask bit
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: Extracts value on given index.
avx2 | avx512 | cuda | neon | oneAPIfpga | oneAPIfpgaRTL | scalar | sse | |
uint8_t | + | + | - | - | - | - | + | + |
int8_t | + | + | - | - | - | - | + | + |
uint16_t | + | + | - | - | - | - | + | + |
int16_t | + | + | - | - | - | - | + | + |
uint32_t | + | + | - | - | - | - | + | + |
int32_t | + | + | - | - | - | - | + | + |
uint64_t | + | + | - | - | - | - | + | + |
int64_t | + | + | - | - | - | - | + | + |
float | + | + | - | - | - | - | + | + |
double | + | + | - | - | - | - | + | + |
Brief: {self.description}