Skip to content

Commit

Permalink
webGPU pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
bmschmidt committed Dec 18, 2024
1 parent 1e1ae8e commit 5670864
Show file tree
Hide file tree
Showing 5 changed files with 385 additions and 307 deletions.
7 changes: 4 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
},
"homepage": "https://github.com/nomic-ai/deepscatter#readme",
"peerDependencies": {
"apache-arrow": ">=11.0.0"
"apache-arrow": "^17.0.0"
},
"dependencies": {
"d3-array": "^3.2.4",
Expand Down
220 changes: 113 additions & 107 deletions src/webGPU/buffertools.ts
Original file line number Diff line number Diff line change
@@ -1,127 +1,133 @@
import { isTypedArray, type TypedArray } from 'webgpu-utils';
import { BufferSet } from '../regl_rendering';
import { WebGPUBufferLocation } from '../types';
// I track locations on buffers like this.
// We keep track of both size -- the number of meaningful data bytes
// and paddedSize -- the number of bytes including 256-byte padding.

export class WebGPUBufferSet extends BufferSet<GPUBuffer, WebGPUBufferLocation> {
// Copied with alterations from deepscatter
import { Some, TupleMap } from '../utilityFunctions';

// An abstraction creating an expandable set of buffers that can be subdivided
// to put more than one variable on the same
// block of memory. Reusing buffers this way can have performance benefits over allocating
// multiple different buffers for each small block used.
// Unlike in webgl, we keep track of both size -- the number of meaningful data bytes
// and paddedSize -- the number of bytes including 256-byte padding.

// The general purpose here is to call 'allocate_block' that releases a block of memory
// to use in creating a new array to be passed to regl.
export class WebGPUBufferSet extends BufferSet<
GPUBuffer,
WebGPUBufferLocation
> {
public device: GPUDevice;
private stagingBuffer: GPUBuffer;
public usage: number;

public device: GPUDevice;
private stagingBuffer: GPUBuffer;
public usage: number;
public store: TupleMap<string, WebGPUBufferLocation> = new TupleMap();

public store: Map<string, WebGPUBufferLocation> = new Map();
/**
*
* @param regl the Regl context we're using.
* @param buffer_size The number of bytes on each strip of memory that we'll ask for.
*/

/**
*
* @param regl the Regl context we're using.
* @param buffer_size The number of bytes on each strip of memory that we'll ask for.
*/
constructor(
device: GPUDevice,
buffer_size: number,
usage: number = GPUBufferUsage.STORAGE |
GPUBufferUsage.COPY_DST |
GPUBufferUsage.COPY_SRC,
) {
super(buffer_size);
this.device = device;
// Track the ends in case we want to allocate smaller items.
this.usage = usage;
this.generate_new_buffer();
this.stagingBuffer = device.createBuffer({
size: buffer_size,
usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.MAP_WRITE,
mappedAtCreation: false, // saves a little trouble in the passThrough function
});
}

constructor(
device: GPUDevice,
buffer_size: number,
usage: number = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
) {
super(buffer_size)
this.device = device;
// Track the ends in case we want to allocate smaller items.
this.usage = usage;
this.generate_new_buffer();
this.stagingBuffer = device.createBuffer({
size: buffer_size,
usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.MAP_WRITE,
mappedAtCreation: false // saves a little trouble in the passThrough function
});
}
private async passThroughStagingBuffer(
values: Uint32Array,
bufferLocation: WebGPUBufferLocation,
) {
// WebGPU
const { buffer, offset, paddedSize } = bufferLocation;
while (this.stagingBuffer.mapState !== 'unmapped') {
// Wait in line for a millisecond.
// Would be better to hold a queue and apply more than one of these at once.
await new Promise((resolve) => setTimeout(resolve, 1));
}
await this.stagingBuffer.mapAsync(GPUMapMode.WRITE, 0, paddedSize);
new Uint32Array(
this.stagingBuffer.getMappedRange(0, values.byteLength),
).set(values);
this.stagingBuffer.unmap();
const commandEncoder = this.device.createCommandEncoder();
commandEncoder.copyBufferToBuffer(
this.stagingBuffer,
0,
buffer,
offset,
paddedSize,
);
this.device.queue.submit([commandEncoder.finish()]);
}

private async passThroughStagingBuffer(values: Uint32Array, bufferLocation: WebGPUBufferLocation) {
// WebGPU
const { buffer, offset, paddedSize } = bufferLocation;
while (this.stagingBuffer.mapState !== 'unmapped') {
// Wait in line for a millisecond.
// Would be better to hold a queue and apply more than one of these at once.
await new Promise((resolve) => setTimeout(resolve, 1));
}
await this.stagingBuffer.mapAsync(GPUMapMode.WRITE, 0, paddedSize);
new Uint32Array(this.stagingBuffer.getMappedRange(0, values.byteLength)).set(values);
this.stagingBuffer.unmap();
const commandEncoder = this.device.createCommandEncoder();
commandEncoder.copyBufferToBuffer(this.stagingBuffer, 0, buffer, offset, paddedSize);
this.device.queue.submit([commandEncoder.finish()]);
}
register(k: Some<string>, v: WebGPUBufferLocation) {
this.store.set(k, v);
}

register(k: string, v: WebGPUBufferLocation) {
this.store.set(k, v);
}
async set(key: Some<string>, value: TypedArray) {
if (this.store.has(key)) {
throw new Error(`Key ${key.join(', ')} already exists in buffer set.`);
}
const size = value.byteLength;
const paddedSize = Math.ceil(size / 256) * 256;

async set(key: string, value: TypedArray) {
if (this.store.has(key)) {
throw new Error(`Key ${key} already exists in buffer set.`);
}
const size = value.byteLength;
const paddedSize = Math.ceil(size / 256) * 256;
const { buffer, offset } = this.allocate_block(paddedSize);

const { buffer, offset } = this.allocate_block(paddedSize);
// If it's a typed array, we can just copy it directly.
// cast it to uint32array
const v2 = value;
const data = new Uint32Array(v2.buffer, v2.byteOffset, v2.byteLength / 4);
const description = { buffer, offset, size, paddedSize };
await this.passThroughStagingBuffer(data, description);
this.register(key, description);
}

// If it's a typed array, we can just copy it directly.
// cast it to uint32array
const v2 = value;
const data = new Uint32Array(v2.buffer, v2.byteOffset, v2.byteLength / 4);
const description = { buffer, offset, size, paddedSize };
await this.passThroughStagingBuffer(data, description);
this.register(key, description);
}
_create_buffer(): GPUBuffer {
return this.device.createBuffer({
size: this.buffer_size,
usage: this.usage,
mappedAtCreation: false,
});
}

_create_buffer() : GPUBuffer {
return this.device.createBuffer({
size: this.buffer_size,
usage: this.usage,
mappedAtCreation: false
})
}

_create_leftover_buffer() : WebGPUBufferLocation {
return {
buffer: this.buffers[0],
offset: this.pointer,
stride: 4, // meaningless here.
byte_size: this.buffer_size - this.pointer,
paddedSize: this.buffer_size - this.pointer
}
}
_create_leftover_buffer(): WebGPUBufferLocation {
return {
buffer: this.buffers[0],
offset: this.pointer,
stride: 4, // meaningless here.
byte_size: this.buffer_size - this.pointer,
paddedSize: this.buffer_size - this.pointer,
};
}
}


export function createSingletonBuffer(
device: GPUDevice,
data: Uint32Array | Int32Array | Float32Array | ArrayBuffer,
usage: number
device: GPUDevice,
data: Uint32Array | Int32Array | Float32Array | ArrayBuffer,
usage: number,
): GPUBuffer {
// Creates a disposable singleton buffer.
// ReadonlyBufferSet ought to provide better performance; but
// this allows more different buffer sizes and easier destruction.
const buffer = device.createBuffer({
size: data.byteLength,
usage,
mappedAtCreation: true
});
const mappedRange = buffer.getMappedRange();
if (isTypedArray(data)) {
new Uint32Array(mappedRange).set(data as TypedArray);
} else {
new Uint32Array(mappedRange).set(new Uint32Array(data as ArrayBuffer));
}
buffer.unmap();
return buffer;
// Creates a disposable singleton buffer.
// ReadonlyBufferSet ought to provide better performance; but
// this allows more different buffer sizes and easier destruction.
const buffer = device.createBuffer({
size: data.byteLength,
usage,
mappedAtCreation: true,
});
const mappedRange = buffer.getMappedRange();
if (isTypedArray(data)) {
new Uint32Array(mappedRange).set(data as TypedArray);
} else {
new Uint32Array(mappedRange).set(new Uint32Array(data as ArrayBuffer));
}
buffer.unmap();
return buffer;
}
Loading

0 comments on commit 5670864

Please sign in to comment.