Skip to content

Commit

Permalink
Adding notebook that shows that we can run a kernel on the GPU.
Browse files Browse the repository at this point in the history
  • Loading branch information
drewoldag committed Jun 13, 2024
1 parent 53f3f4c commit 7cfb2ca
Show file tree
Hide file tree
Showing 2 changed files with 254 additions and 84 deletions.
254 changes: 254 additions & 0 deletions docs/notebooks/example-cuda-python.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This example works through the Overview section of the CUDA Python documentation.\n",
"\n",
"https://nvidia.github.io/cuda-python/overview.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cuda import cuda, nvrtc\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def ASSERT_DRV(err):\n",
" if isinstance(err, cuda.CUresult):\n",
" if err != cuda.CUresult.CUDA_SUCCESS:\n",
" raise RuntimeError(\"Cuda Error: {}\".format(err))\n",
" elif isinstance(err, nvrtc.nvrtcResult):\n",
" if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:\n",
" raise RuntimeError(\"Nvrtc Error: {}\".format(err))\n",
" else:\n",
" raise RuntimeError(\"Unknown error type: {}\".format(err))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"saxpy = \"\"\"\\\n",
"extern \"C\" __global__\n",
"void saxpy(float a, float *x, float *y, float *out, size_t n)\n",
"{\n",
" size_t tid = blockIdx.x * blockDim.x + threadIdx.x;\n",
" if (tid < n) {\n",
" out[tid] = a * x[tid] + y[tid];\n",
" }\n",
"}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create program\n",
"err, prog = nvrtc.nvrtcCreateProgram(str.encode(saxpy), b\"saxpy.cu\", 0, [], [])\n",
"\n",
"# Compile program\n",
"opts = [b\"--fmad=false\", b\"--gpu-architecture=compute_75\"]\n",
"err, = nvrtc.nvrtcCompileProgram(prog, 2, opts)\n",
"\n",
"# Get PTX from compilation\n",
"err, ptxSize = nvrtc.nvrtcGetPTXSize(prog)\n",
"ptx = b\" \" * ptxSize\n",
"err, = nvrtc.nvrtcGetPTX(prog, ptx)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Initialize CUDA Driver API\n",
"err, = cuda.cuInit(0)\n",
"\n",
"# Retrieve handle for device 0\n",
"err, cuDevice = cuda.cuDeviceGet(0)\n",
"\n",
"# Create context\n",
"err, context = cuda.cuCtxCreate(0, cuDevice)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load PTX as module data and retrieve function\n",
"ptx = np.char.array(ptx)\n",
"# Note: Incompatible --gpu-architecture would be detected here\n",
"err, module = cuda.cuModuleLoadData(ptx.ctypes.data)\n",
"ASSERT_DRV(err)\n",
"err, kernel = cuda.cuModuleGetFunction(module, b\"saxpy\")\n",
"ASSERT_DRV(err)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"NUM_THREADS = 512 # Threads per block\n",
"NUM_BLOCKS = 32768 # Blocks per grid\n",
"\n",
"a = np.array([2.0], dtype=np.float32)51515\n",
"n = np.array(NUM_THREADS * NUM_BLOCKS, dtype=np.uint32)\n",
"bufferSize = n * a.itemsize\n",
"\n",
"hX = np.random.rand(n).astype(dtype=np.float32)\n",
"hY = np.random.rand(n).astype(dtype=np.float32)\n",
"hOut = np.zeros(n).astype(dtype=np.float32)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"err, dXclass = cuda.cuMemAlloc(bufferSize)\n",
"err, dYclass = cuda.cuMemAlloc(bufferSize)\n",
"err, dOutclass = cuda.cuMemAlloc(bufferSize)\n",
"\n",
"err, stream = cuda.cuStreamCreate(0)\n",
"\n",
"err, = cuda.cuMemcpyHtoDAsync(\n",
" dXclass, hX.ctypes.data, bufferSize, stream\n",
")\n",
"err, = cuda.cuMemcpyHtoDAsync(\n",
" dYclass, hY.ctypes.data, bufferSize, stream\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The following code example is not intuitive \n",
"# Subject to change in a future release\n",
"dX = np.array([int(dXclass)], dtype=np.uint64)\n",
"dY = np.array([int(dYclass)], dtype=np.uint64)\n",
"dOut = np.array([int(dOutclass)], dtype=np.uint64)\n",
"\n",
"args = [a, dX, dY, dOut, n]\n",
"args = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"err, = cuda.cuLaunchKernel(\n",
" kernel,\n",
" NUM_BLOCKS, # grid x dim\n",
" 1, # grid y dim\n",
" 1, # grid z dim\n",
" NUM_THREADS, # block x dim\n",
" 1, # block y dim\n",
" 1, # block z dim\n",
" 0, # dynamic shared memory\n",
" stream, # stream\n",
" args.ctypes.data, # kernel arguments\n",
" 0, # extra (ignore)\n",
")\n",
"\n",
"err, = cuda.cuMemcpyDtoHAsync(\n",
" hOut.ctypes.data, dOutclass, bufferSize, stream\n",
")\n",
"err, = cuda.cuStreamSynchronize(stream)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The results of running Saxpy on the GPU are stored in the `hOut` array.\n",
"\n",
"Next we perform the same calculation in python (numpy) and confirm that the results match."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Assert values are same after running kernel\n",
"hZ = a * hX + hY\n",
"if not np.allclose(hOut, hZ):\n",
" raise ValueError(\"Error outside tolerance for host-device vectors\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(hOut[:10])\n",
"print(hZ[:10])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"err, = cuda.cuStreamDestroy(stream)\n",
"err, = cuda.cuMemFree(dXclass)\n",
"err, = cuda.cuMemFree(dYclass)\n",
"err, = cuda.cuMemFree(dOutclass)\n",
"err, = cuda.cuModuleUnload(module)\n",
"err, = cuda.cuCtxDestroy(context)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "mapals",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
84 changes: 0 additions & 84 deletions docs/notebooks/intro_notebook.ipynb

This file was deleted.

0 comments on commit 7cfb2ca

Please sign in to comment.