Skip to content

Commit

Permalink
webGPU resources
Browse files Browse the repository at this point in the history
  • Loading branch information
bmschmidt committed Dec 17, 2024
1 parent 7237d17 commit 1e1ae8e
Show file tree
Hide file tree
Showing 3 changed files with 491 additions and 0 deletions.
127 changes: 127 additions & 0 deletions src/webGPU/buffertools.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import { isTypedArray, type TypedArray } from 'webgpu-utils';
import { BufferSet } from '../regl_rendering';
import { WebGPUBufferLocation } from '../types';
// I track locations on buffers like this.
// We keep track of both size -- the number of meaningful data bytes
// and paddedSize -- the number of bytes including 256-byte padding.

export class WebGPUBufferSet extends BufferSet<GPUBuffer, WebGPUBufferLocation> {
// Copied with alterations from deepscatter

// An abstraction creating an expandable set of buffers that can be subdivided
// to put more than one variable on the same
// block of memory. Reusing buffers this way can have performance benefits over allocating
// multiple different buffers for each small block used.

// The general purpose here is to call 'allocate_block' that releases a block of memory
// to use in creating a new array to be passed to regl.

public device: GPUDevice;
private stagingBuffer: GPUBuffer;
public usage: number;

public store: Map<string, WebGPUBufferLocation> = new Map();

/**
*
* @param regl the Regl context we're using.
* @param buffer_size The number of bytes on each strip of memory that we'll ask for.
*/

constructor(
device: GPUDevice,
buffer_size: number,
usage: number = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
) {
super(buffer_size)
this.device = device;
// Track the ends in case we want to allocate smaller items.
this.usage = usage;
this.generate_new_buffer();
this.stagingBuffer = device.createBuffer({
size: buffer_size,
usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.MAP_WRITE,
mappedAtCreation: false // saves a little trouble in the passThrough function
});
}

private async passThroughStagingBuffer(values: Uint32Array, bufferLocation: WebGPUBufferLocation) {
// WebGPU
const { buffer, offset, paddedSize } = bufferLocation;
while (this.stagingBuffer.mapState !== 'unmapped') {
// Wait in line for a millisecond.
// Would be better to hold a queue and apply more than one of these at once.
await new Promise((resolve) => setTimeout(resolve, 1));
}
await this.stagingBuffer.mapAsync(GPUMapMode.WRITE, 0, paddedSize);
new Uint32Array(this.stagingBuffer.getMappedRange(0, values.byteLength)).set(values);
this.stagingBuffer.unmap();
const commandEncoder = this.device.createCommandEncoder();
commandEncoder.copyBufferToBuffer(this.stagingBuffer, 0, buffer, offset, paddedSize);
this.device.queue.submit([commandEncoder.finish()]);
}

register(k: string, v: WebGPUBufferLocation) {
this.store.set(k, v);
}

async set(key: string, value: TypedArray) {
if (this.store.has(key)) {
throw new Error(`Key ${key} already exists in buffer set.`);
}
const size = value.byteLength;
const paddedSize = Math.ceil(size / 256) * 256;

const { buffer, offset } = this.allocate_block(paddedSize);

// If it's a typed array, we can just copy it directly.
// cast it to uint32array
const v2 = value;
const data = new Uint32Array(v2.buffer, v2.byteOffset, v2.byteLength / 4);
const description = { buffer, offset, size, paddedSize };
await this.passThroughStagingBuffer(data, description);
this.register(key, description);
}

_create_buffer() : GPUBuffer {
return this.device.createBuffer({
size: this.buffer_size,
usage: this.usage,
mappedAtCreation: false
})
}

_create_leftover_buffer() : WebGPUBufferLocation {
return {
buffer: this.buffers[0],
offset: this.pointer,
stride: 4, // meaningless here.
byte_size: this.buffer_size - this.pointer,
paddedSize: this.buffer_size - this.pointer
}
}
}


export function createSingletonBuffer(
device: GPUDevice,
data: Uint32Array | Int32Array | Float32Array | ArrayBuffer,
usage: number
): GPUBuffer {
// Creates a disposable singleton buffer.
// ReadonlyBufferSet ought to provide better performance; but
// this allows more different buffer sizes and easier destruction.
const buffer = device.createBuffer({
size: data.byteLength,
usage,
mappedAtCreation: true
});
const mappedRange = buffer.getMappedRange();
if (isTypedArray(data)) {
new Uint32Array(mappedRange).set(data as TypedArray);
} else {
new Uint32Array(mappedRange).set(new Uint32Array(data as ArrayBuffer));
}
buffer.unmap();
return buffer;
}
170 changes: 170 additions & 0 deletions src/webGPU/forests.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import { createSingletonBuffer, WebGPUBufferSet } from "./buffertools";
import { StatefulGPU } from "./lib";

type TinyForestParams = {
nTrees: number;
depth: number;
// The number of features to consider at each split.
maxFeatures: number;
D: number;
}

const defaultTinyForestParams : TinyForestParams = {
nTrees: 128,
depth: 8,
maxFeatures: 32,
D: 768,
}

export class TinyForest extends StatefulGPU {
params: TinyForestParams;

private _bootstrapSamples?: GPUBuffer; // On the order of 100 KB
protected _forests?: GPUBuffer // On the order of 10 MB.
// private trainedThrough: number = 0;
constructor(
device: GPUDevice,
bufferSize = 1024 * 1024 * 256,
t: Partial<TinyForestParams> = {}) {
super(device, bufferSize)
this.params = {...defaultTinyForestParams, ...t}
this.initializeForestsToZero()
this.bufferSet = new WebGPUBufferSet(device, bufferSize);
}

countPipeline(): GPUComputePipeline {
const { device } = this;
// const { maxFeatures, nTrees } = this.params
// const OPTIONS = 2;
// const countBuffer = device.createBuffer({
// size: OPTIONS * maxFeatures * nTrees * 4,
// usage: GPUBufferUsage.STORAGE & GPUBufferUsage.COPY_SRC,
// mappedAtCreation: false
// });

const layout = device.createBindGroupLayout({
entries: [
{
// features buffer;
binding: 0,
visibility: GPUShaderStage.COMPUTE,
buffer: { type: 'storage' }
},
{
// dims to check array;
binding: 1,
visibility: GPUShaderStage.COMPUTE,
buffer: { type: 'storage' }
},
{
// output count buffer.
binding: 2,
visibility: GPUShaderStage.COMPUTE,
buffer: { type: 'storage' }
}
]
})

// const subsetsToCheck = this.chooseNextFeatures();
const pipelineLayout = device.createPipelineLayout({ bindGroupLayouts: [layout] });

const shaderModule = device.createShaderModule({ code: `
@group(0) @binding(0) var<storage, read> features: array<u32>;
@group(0) @binding(1) var<storage, read> dimsToCheck: array<u16>;
@group(0) @binding(2) var<storage, write> counts: array<u32>;
@compute @workgroup_size(64)
//TODOD HERE
` });


return device.createComputePipeline({
layout: pipelineLayout,
compute: {
module: shaderModule,
entryPoint: 'main'
}
});
}

//@ts-expect-error foo
private chooseNextFeatures(n = 32) {
console.log({n})
const { maxFeatures, nTrees, D } = this.params;
const features = new Uint16Array(maxFeatures * D);
for (let i = 0; i < nTrees; i++) {
const set = new Set<number>();
while (set.size < maxFeatures) {
set.add(Math.floor(Math.random() * D));
}
const arr = new Uint16Array([...set].sort());
features.set(arr, i * maxFeatures);
}
return createSingletonBuffer(
this.device,
features,
GPUBufferUsage.STORAGE
)
}



initializeForestsToZero() {
// Each tree is a set of bits; For every possible configuration
// the first D indicating
// the desired outcome for the dimension,
// the second D indicating whether the bits in those
// positions are to be considered in checking if the tree
// fits. There are 2**depth bitmasks for each dimension--each point
// will match only one, and part of the inference task is determining which one.

const treeSizeInBytes =
2 * this.params.D * (2 ** this.params.depth) / 8;

const data = new Uint8Array(treeSizeInBytes * this.params.nTrees)
this._forests = createSingletonBuffer(
this.device,
data,
GPUBufferUsage.STORAGE
)
}


// Rather than actually bootstrap, we generate a single
// list of 100,000 numbers drawn from a poisson distribution.
// These serve as weights for draws with replacement; to
// bootstrap any given record batch, we take a sequence of
// numbers from the buffer with offset i.
get bootstrapSamples() {
if (this._bootstrapSamples) {
return this._bootstrapSamples
} else {
const arr = new Uint8Array(100000)
for (let i = 0; i < arr.length; i++) {
arr[i] = poissonRandomNumber()
}
this._bootstrapSamples = createSingletonBuffer(
this.device,
arr,
GPUBufferUsage.STORAGE
)
return this._bootstrapSamples
}
}


}


function poissonRandomNumber() : number {
let p = 1.0;
let k = 0;

do {
k++;
p *= Math.random();
} while (p > 1/Math.E);

return k - 1;
}

Loading

0 comments on commit 1e1ae8e

Please sign in to comment.