Skip to content

Commit

Permalink
Merge pull request #1 from KernelTuner/dev
Browse files Browse the repository at this point in the history
Rewrite core structure
  • Loading branch information
stijnh authored Sep 21, 2023
2 parents 8026c7f + 46d598c commit 9ed71cb
Show file tree
Hide file tree
Showing 40 changed files with 6,966 additions and 4,111 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/cmake-action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: CMake

on:
workflow_call:
inputs:
cuda-version:
required: true
type: string

env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
BUILD_TYPE: Debug

jobs:
build:
# The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac.
# You can convert this to a matrix build if you need cross-platform coverage.
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
runs-on: ubuntu-latest

steps:
- uses: Jimver/[email protected]
id: cuda-toolkit
with:
method: network
sub-packages: '["nvcc"]'
cuda: ${{ inputs.cuda-version }}

- uses: actions/checkout@v3
with:
submodules: 'true'

- name: Configure CMake
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DKERNEL_FLOAT_BUILD_TEST=1 -DKERNEL_FLOAT_BUILD_EXAMPLE=1

- name: Build
# Build your program with the given configuration
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}

- name: Test
working-directory: ${{github.workspace}}/build
# Execute tests defined by the CMake configuration.
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
run: ./tests/kernel_float_tests --durations=yes --success --verbosity=high ~[GPU]

28 changes: 28 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: CMake

on:
push:
pull_request:
branches: [ "main" ]

env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
BUILD_TYPE: Debug

jobs:
build-cuda:
uses: ./.github/workflows/cmake-action.yml
with:
cuda-version: "12.2.0"

build-cuda-11-7:
needs: build-cuda
uses: ./.github/workflows/cmake-action.yml
with:
cuda-version: "11.7.0"

build-cuda-12-0:
needs: build-cuda
uses: ./.github/workflows/cmake-action.yml
with:
cuda-version: "12.0.0"
21 changes: 20 additions & 1 deletion combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@
import subprocess
from datetime import datetime

license_boilerplate = """/*
* Kernel Float: Header-only library for vector types and reduced precision floating-point math.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
"""

directory = "include/kernel_float"
contents = dict()

Expand All @@ -28,7 +46,8 @@
except Exception as e:
print(f"warning: {e}")

output = "\n".join([
output = license_boilerplate
output += "\n".join([
"//" + "=" * 80,
"// this file has been auto-generated, do not modify its contents!",
f"// date: {date}",
Expand Down
4 changes: 3 additions & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ API Reference
.. toctree::
api/types.rst
api/primitives.rst
api/generation.rst
api/unary_operators.rst
api/binary_operators.rst
api/reductions.rst
api/shuffling.rst
api/mathematical.rst
api/conditional.rst

65 changes: 35 additions & 30 deletions docs/build_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,51 +65,51 @@ def build_index_page(groups):

return filename

aliases = []
for ty in ["vec", "float", "double", "half", "bfloat16x", ""]:
if ty != "vec":
aliases.append(f"{ty}X")

aliases = ["scalar", "vec"]
for ty in ["vec"]:
for i in range(2, 8 + 1):
aliases.append(f"{ty}{i}")

groups = {
"Types": [
("vector", "vector", "struct"),
("Aliases", [
"unaligned_vec",
"vec",
] + aliases,
"typedef"),
("Aliases", aliases, "typedef"),
],
"Primitives": [
("range", "range()"),
("range", "range(F)"),
"map",
"reduce",
"zip",
"zip_common",
"cast",
"broadcast",
"resize",
"for_each",
],
"Shuffling": [
"convert",
"make_vec",
"into_vector",
"concat",
"swizzle",
"first",
"last",
"reversed",
"rotate_left",
"rotate_right",
"select",
"for_each",
],
"Unary Operators": [
"Generation": [
"range",
"range_like",
"each_index",
"fill",
"fill_like",
"zeros",
"zeros_like",
"ones",
"ones_like",
],
"Shuffling": [
# "concat",
# "swizzle",
# "first",
# "last",
# "reversed",
# "rotate_left",
# "rotate_right",
],
"Unary Operators": [
"negate",
"bit_not",
"logical_not",
Expand All @@ -135,21 +135,21 @@ def build_index_page(groups):
("min", "min(L&&, R&&)"),
"nextafter",
"modf",
"pow",
("pow", "pow(L&&, R&&)"),
"remainder",
#"rhypot",
],
"Reductions": [
"sum",
("max", "max(V&&)"),
("min", "min(V&&)"),
("max", "max(const V&)"),
("min", "min(const V&)"),
"product",
"all",
"any",
"count",
],
"Mathematical": [
"abs",
("abs", "abs(const V&)"),
"acos",
"acosh",
"asin",
Expand All @@ -166,22 +166,22 @@ def build_index_page(groups):
"erfcinv",
"erfcx",
"erfinv",
"exp",
("exp", "exp(const V&)"),
"exp10",
"exp2",
"fabs",
"floor",
"ilogb",
"lgamma",
"log",
("log", "log(const V&)"),
"log10",
"logb",
"nearbyint",
"normcdf",
"rcbrt",
"sin",
"sinh",
"sqrt",
("sqrt", "sqrt(const V&)"),
"tan",
"tanh",
"tgamma",
Expand All @@ -193,6 +193,11 @@ def build_index_page(groups):
"isinf",
"isnan",
],
"Conditional": [
("where", "where(const C&, const L&, const R&)"),
("where", "where(const C&, const L&)"),
("where", "where(const C&)"),
]
}

build_index_page(groups)
22 changes: 8 additions & 14 deletions examples/vector_add/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
#include <vector>

#include "kernel_float.h"
namespace kf = kernel_float;

using x = kf::half;
using namespace kernel_float::prelude;

void cuda_check(cudaError_t code) {
if (code != cudaSuccess) {
Expand All @@ -15,11 +13,7 @@ void cuda_check(cudaError_t code) {
}

template<int N>
__global__ void my_kernel(
int length,
const kf::unaligned_vec<__half, N>* input,
double constant,
kf::unaligned_vec<float, N>* output) {
__global__ void my_kernel(int length, const khalf<N>* input, double constant, kfloat<N>* output) {
int i = blockIdx.x * blockDim.x + threadIdx.x;

if (i * N < length) {
Expand All @@ -30,24 +24,24 @@ __global__ void my_kernel(
template<int items_per_thread>
void run_kernel(int n) {
double constant = 1.0;
std::vector<__half> input(n);
std::vector<half> input(n);
std::vector<float> output_expected;
std::vector<float> output_result;

// Generate input data
for (int i = 0; i < n; i++) {
input[i] = __half(i);
input[i] = half(i);
output_expected[i] = float(i + constant);
}

// Allocate device memory
kf::unaligned_vec<__half, items_per_thread>* input_dev;
kf::unaligned_vec<float, items_per_thread>* output_dev;
cuda_check(cudaMalloc(&input_dev, sizeof(__half) * n));
khalf<items_per_thread>* input_dev;
kfloat<items_per_thread>* output_dev;
cuda_check(cudaMalloc(&input_dev, sizeof(half) * n));
cuda_check(cudaMalloc(&output_dev, sizeof(float) * n));

// Copy device memory
cuda_check(cudaMemcpy(input_dev, input.data(), sizeof(__half) * n, cudaMemcpyDefault));
cuda_check(cudaMemcpy(input_dev, input.data(), sizeof(half) * n, cudaMemcpyDefault));

// Launch kernel!
int block_size = 256;
Expand Down
12 changes: 6 additions & 6 deletions include/kernel_float.h
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
#ifndef KERNEL_FLOAT_H
#define KERNEL_FLOAT_H

#include "kernel_float/base.h"
#include "kernel_float/bf16.h"
#include "kernel_float/binops.h"
#include "kernel_float/cast.h"
#include "kernel_float/conversion.h"
#include "kernel_float/fp16.h"
#include "kernel_float/fp8.h"
#include "kernel_float/interface.h"
#include "kernel_float/iterate.h"
#include "kernel_float/macros.h"
#include "kernel_float/meta.h"
#include "kernel_float/prelude.h"
#include "kernel_float/reduce.h"
#include "kernel_float/storage.h"
#include "kernel_float/swizzle.h"
#include "kernel_float/triops.h"
#include "kernel_float/unops.h"
#include "kernel_float/vector.h"

#endif
#endif
Loading

0 comments on commit 9ed71cb

Please sign in to comment.