Skip to content

Commit

Permalink
Upload only dirty light clusters regions, PR #418
Browse files Browse the repository at this point in the history
Ends up uploading only a few megs per frame, spread across a hundred or so ranges on average.
Still not great, now validation is too slow. But otherwise it's back to ~60fps.

I think it's end of the line for this approach. Even betterer light clusters would need a complete overhaul, e.g. being moved completely to GPU compute.

Fixes #385
  • Loading branch information
w23 committed Feb 7, 2023
2 parents 18a7c61 + 0573186 commit 3edcb7c
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 43 deletions.
14 changes: 7 additions & 7 deletions ref_vk/shaders/light.glsl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
layout (set = 0, binding = BINDING_LIGHTS) readonly buffer SBOLights { LightsMetadata m; } lights;
layout (set = 0, binding = BINDING_LIGHT_CLUSTERS, align = 1) readonly buffer UBOLightClusters {
ivec3 grid_min, grid_size;
//ivec3 grid_min, grid_size;
//uint8_t clusters_data[MAX_LIGHT_CLUSTERS * LIGHT_CLUSTER_SIZE + HACK_OFFSET];
LightCluster clusters_[MAX_LIGHT_CLUSTERS];
} light_grid;
Expand All @@ -25,11 +25,11 @@ void computePointLights(vec3 P, vec3 N, uint cluster_index, vec3 throughput, vec
diffuse = specular = vec3(0.);

//diffuse = vec3(1.);//float(lights.m.num_point_lights) / 64.);
//#define USE_CLUSTERS
#define USE_CLUSTERS
#ifdef USE_CLUSTERS
const uint num_point_lights = uint(light_grid.clusters[cluster_index].num_point_lights);
const uint num_point_lights = uint(light_grid.clusters_[cluster_index].num_point_lights);
for (uint j = 0; j < num_point_lights; ++j) {
const uint i = uint(light_grid.clusters[cluster_index].point_lights[j]);
const uint i = uint(light_grid.clusters_[cluster_index].point_lights[j]);
#else
for (uint i = 0; i < lights.m.num_point_lights; ++i) {
#endif
Expand Down Expand Up @@ -116,11 +116,11 @@ void computePointLights(vec3 P, vec3 N, uint cluster_index, vec3 throughput, vec
void computeLighting(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, MaterialProperties material, out vec3 diffuse, out vec3 specular) {
diffuse = specular = vec3(0.);

const ivec3 light_cell = ivec3(floor(P / LIGHT_GRID_CELL_SIZE)) - light_grid.grid_min;
const uint cluster_index = uint(dot(light_cell, ivec3(1, light_grid.grid_size.x, light_grid.grid_size.x * light_grid.grid_size.y)));
const ivec3 light_cell = ivec3(floor(P / LIGHT_GRID_CELL_SIZE)) - lights.m.grid_min_cell;
const uint cluster_index = uint(dot(light_cell, ivec3(1, lights.m.grid_size.x, lights.m.grid_size.x * lights.m.grid_size.y)));

#ifdef USE_CLUSTERS
if (any(greaterThanEqual(light_cell, light_grid.grid_size)) || cluster_index >= MAX_LIGHT_CLUSTERS)
if (any(greaterThanEqual(light_cell, lights.m.grid_size)) || cluster_index >= MAX_LIGHT_CLUSTERS)
return; // throughput * vec3(1., 0., 0.);
#endif

Expand Down
14 changes: 7 additions & 7 deletions ref_vk/shaders/light_polygon.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,12 @@ void sampleSinglePolygonLight(in vec3 P, in vec3 N, in vec3 view_dir, in SampleC
#if 0
// Sample random one
void sampleEmissiveSurfaces(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, MaterialProperties material, uint cluster_index, inout vec3 diffuse, inout vec3 specular) {
const uint num_polygons = uint(light_grid.clusters[cluster_index].num_polygons);
const uint num_polygons = uint(light_grid.clusters_[cluster_index].num_polygons);

if (num_polygons == 0)
return;

const uint selected = uint(light_grid.clusters[cluster_index].polygons[rand_range(num_polygons)]);
const uint selected = uint(light_grid.clusters_[cluster_index].polygons[rand_range(num_polygons)]);

const PolygonLight poly = lights.m.polygons[selected];
const SampleContext ctx = buildSampleContext(P, N, view_dir);
Expand All @@ -212,11 +212,11 @@ void sampleEmissiveSurfaces(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, Mate
#if DO_ALL_IN_CLUSTER
const SampleContext ctx = buildSampleContext(P, N, view_dir);

//#define USE_CLUSTERS
#define USE_CLUSTERS
#ifdef USE_CLUSTERS
const uint num_polygons = uint(light_grid.clusters[cluster_index].num_polygons);
const uint num_polygons = uint(light_grid.clusters_[cluster_index].num_polygons);
for (uint i = 0; i < num_polygons; ++i) {
const uint index = uint(light_grid.clusters[cluster_index].polygons[i]);
const uint index = uint(light_grid.clusters_[cluster_index].polygons[i]);
#else
for (uint index = 0; index < lights.m.num_polygons; ++index) {
#endif
Expand Down Expand Up @@ -257,7 +257,7 @@ void sampleEmissiveSurfaces(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, Mate

#ifdef USE_CLUSTERS
// TODO move this to pickPolygonLight function
const uint num_polygons = uint(light_grid.clusters[cluster_index].num_polygons);
const uint num_polygons = uint(light_grid.clusters_[cluster_index].num_polygons);
#else
const uint num_polygons = lights.m.num_polygons;
#endif
Expand All @@ -267,7 +267,7 @@ void sampleEmissiveSurfaces(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, Mate
float eps1 = rand01();
for (uint i = 0; i < num_polygons; ++i) {
#ifdef USE_CLUSTERS
const uint index = uint(light_grid.clusters[cluster_index].polygons[i]);
const uint index = uint(light_grid.clusters_[cluster_index].polygons[i]);
#else
const uint index = i;
#endif
Expand Down
5 changes: 5 additions & 0 deletions ref_vk/shaders/ray_interop.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define vec3 vec3_t
#define vec4 vec4_t
#define mat4 matrix4x4
typedef int ivec3[3];
#define TOKENPASTE(x, y) x ## y
#define TOKENPASTE2(x, y) TOKENPASTE(x, y)
#define PAD(x) float TOKENPASTE2(pad_, __LINE__)[x];
Expand Down Expand Up @@ -111,6 +112,10 @@ struct LightsMetadata {
uint num_polygons;
uint num_point_lights;
PAD(2)
ivec3 grid_min_cell;
PAD(1)
ivec3 grid_size;
PAD(1)
STRUCT PointLight point_lights[MAX_POINT_LIGHTS];
STRUCT PolygonLight polygons[MAX_EMISSIVE_KUSOCHKI];
vec4 polygon_vertices[MAX_EMISSIVE_KUSOCHKI * 7]; // vec3 but aligned
Expand Down
10 changes: 9 additions & 1 deletion ref_vk/vk_framectl.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include "vk_staging.h"
#include "vk_commandpool.h"

#include "vk_light.h" // For stats
#include "shaders/ray_interop.h" // stats: struct LightCluster

#include "profiler.h"

#include "eiface.h" // ARRAYSIZE
Expand Down Expand Up @@ -182,8 +185,13 @@ static void updateGamma( void ) {
}
}

// FIXME move this to r print speeds or something like that
// FIXME move this to r_speeds or something like that
static void showProfilingData( void ) {
{
const int dirty = g_lights.stats.dirty_cells;
gEngine.Con_NPrintf(4, "Dirty light cells: %d, size = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
}

gEngine.Con_NPrintf(5, "Perf scopes:");
for (int i = 0; i < g_aprof.num_scopes; ++i) {
const aprof_scope_t *const scope = g_aprof.scopes + i;
Expand Down
98 changes: 74 additions & 24 deletions ref_vk/vk_light.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,6 @@ typedef struct {
qboolean set;
} vk_emissive_texture_t;

typedef struct {
int min_cell[4], size[3]; // 4th element is padding
struct LightCluster cells[MAX_LIGHT_CLUSTERS];
} vk_ray_shader_light_grid_t;

struct Lights {
struct LightsMetadata metadata;
vk_ray_shader_light_grid_t grid;
};

static struct {
struct {
vk_emissive_texture_t emissive_textures[MAX_TEXTURES];
Expand All @@ -72,6 +62,7 @@ static struct {

bit_array_t visited_cells;

uint32_t frame_sequence;
} g_lights_;

static struct {
Expand All @@ -95,7 +86,9 @@ qboolean VK_LightsInit( void ) {

gEngine.Cmd_AddCommand("vk_lights_dump", debugDumpLights, "Dump all light sources for next frame");

if (!VK_BufferCreate("rt lights buffer", &g_lights_.buffer, sizeof(struct Lights),
const int buffer_size = sizeof(struct LightsMetadata) + sizeof(struct LightCluster) * MAX_LIGHT_CLUSTERS;

if (!VK_BufferCreate("rt lights buffer", &g_lights_.buffer, buffer_size,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
// FIXME complain, handle
Expand Down Expand Up @@ -536,6 +529,7 @@ void RT_LightsNewMapBegin( const struct model_s *map ) {
vk_lights_cell_t *const cell = g_lights.cells + i;
cell->num_point_lights = cell->num_static.point_lights = 0;
cell->num_polygons = cell->num_static.polygons = 0;
cell->frame_sequence = g_lights_.frame_sequence;
}
}
}
Expand All @@ -545,6 +539,8 @@ void RT_LightsFrameBegin( void ) {
g_lights_.num_point_lights = g_lights_.num_static.point_lights;
g_lights_.num_polygon_vertices = g_lights_.num_static.polygon_vertices;

g_lights.stats.dirty_cells = 0;

for (int i = 0; i < g_lights.map.grid_cells; ++i) {
vk_lights_cell_t *const cell = g_lights.cells + i;
cell->num_polygons = cell->num_static.polygons;
Expand All @@ -564,6 +560,10 @@ static qboolean addSurfaceLightToCell( int cell_index, int polygon_light_index )
}

cluster->polygons[cluster->num_polygons++] = polygon_light_index;
if (cluster->frame_sequence != g_lights_.frame_sequence) {
++g_lights.stats.dirty_cells;
cluster->frame_sequence = g_lights_.frame_sequence;
}
return true;
}

Expand All @@ -578,6 +578,11 @@ static qboolean addLightToCell( int cell_index, int light_index ) {
}

cluster->point_lights[cluster->num_point_lights++] = light_index;

if (cluster->frame_sequence != g_lights_.frame_sequence) {
++g_lights.stats.dirty_cells;
cluster->frame_sequence = g_lights_.frame_sequence;
}
return true;
}

Expand Down Expand Up @@ -943,6 +948,8 @@ void RT_LightsNewMapEnd( const struct model_s *map ) {
cell->num_static.polygons = cell->num_polygons;
}
}

g_lights.stats.dirty_cells = g_lights.map.grid_cells;
}

qboolean RT_GetEmissiveForTexture( vec3_t out, int texture_id ) {
Expand Down Expand Up @@ -1124,21 +1131,59 @@ int RT_LightAddPolygon(const rt_light_add_polygon_t *addpoly) {
}
}

static void uploadGrid( vk_ray_shader_light_grid_t *grid ) {
ASSERT(g_lights.map.grid_cells <= MAX_LIGHT_CLUSTERS);
static void uploadGridRange( int begin, int end ) {
const int count = end - begin;
ASSERT( count > 0 );

VectorCopy(g_lights.map.grid_min_cell, grid->min_cell);
VectorCopy(g_lights.map.grid_size, grid->size);
const int size = count * sizeof(struct LightCluster);
const vk_staging_region_t locked = R_VkStagingLockForBuffer( (vk_staging_buffer_args_t) {
.buffer = g_lights_.buffer.buffer,
.offset = sizeof(struct LightsMetadata) + begin * sizeof(struct LightCluster),
.size = size,
.alignment = 16, // WHY?
} );

for (int i = 0; i < g_lights.map.grid_cells; ++i) {
const vk_lights_cell_t *const src = g_lights.cells + i;
struct LightCluster *const dst = grid->cells + i;
ASSERT(locked.ptr);

struct LightCluster *const grid = locked.ptr;
memset(grid, 0, size);

for (int i = 0; i < count; ++i) {
const vk_lights_cell_t *const src = g_lights.cells + i + begin;
struct LightCluster *const dst = grid + i;

dst->num_point_lights = src->num_point_lights;
dst->num_polygons = src->num_polygons;
memcpy(dst->point_lights, src->point_lights, sizeof(uint8_t) * src->num_point_lights);
memcpy(dst->polygons, src->polygons, sizeof(uint8_t) * src->num_polygons);
}

R_VkStagingUnlock( locked.handle );

g_lights.stats.ranges_uploaded++;
}

static void uploadGrid( void ) {
ASSERT(g_lights.map.grid_cells <= MAX_LIGHT_CLUSTERS);

g_lights.stats.ranges_uploaded = 0;

int begin = -1;
for (int i = 0; i < g_lights.map.grid_cells; ++i) {
const vk_lights_cell_t *const cell = g_lights.cells + i;

const qboolean dirty = cell->frame_sequence == g_lights_.frame_sequence;
if (dirty && begin < 0)
begin = i;

if (!dirty && begin >= 0) {
uploadGridRange(begin, i);
begin = -1;
}
}

if (begin >= 0)
uploadGridRange(begin, g_lights.map.grid_cells);
}

static void uploadPolygonLights( struct LightsMetadata *metadata ) {
Expand Down Expand Up @@ -1189,26 +1234,31 @@ static void uploadPointLights( struct LightsMetadata *metadata ) {
}
}

vk_lights_bindings_t VK_LightsUpload( VkCommandBuffer cmdbuf ) {

vk_lights_bindings_t VK_LightsUpload( void ) {
const vk_staging_region_t locked = R_VkStagingLockForBuffer( (vk_staging_buffer_args_t) {
.buffer = g_lights_.buffer.buffer,
.offset = 0,
.size = sizeof(struct LightsMetadata),
.alignment = 16,
.alignment = 16, // WHY?
} );

ASSERT(locked.ptr);

struct LightsMetadata *metadata = locked.ptr;
memset(metadata, 0, sizeof(*metadata));

VectorCopy(g_lights.map.grid_min_cell, metadata->grid_min_cell);
VectorCopy(g_lights.map.grid_size, metadata->grid_size);

uploadPolygonLights( metadata );
uploadPointLights( metadata );

// FIXME uploadGrid( &lights->grid );

R_VkStagingUnlock( locked.handle );

uploadGrid();

g_lights_.frame_sequence++;

return (vk_lights_bindings_t){
.buffer = g_lights_.buffer.buffer,
.metadata = {
Expand All @@ -1217,7 +1267,7 @@ vk_lights_bindings_t VK_LightsUpload( VkCommandBuffer cmdbuf ) {
},
.grid = {
.offset = sizeof(struct LightsMetadata),
.size = sizeof(vk_ray_shader_light_grid_t),
.size = sizeof(struct LightCluster) * MAX_LIGHT_CLUSTERS,
},
};
}
Expand Down
9 changes: 8 additions & 1 deletion ref_vk/vk_light.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ typedef struct {
uint8_t point_lights;
uint8_t polygons;
} num_static;

uint32_t frame_sequence;
} vk_lights_cell_t;

typedef struct {
Expand Down Expand Up @@ -57,6 +59,11 @@ typedef struct {
} map;

vk_lights_cell_t cells[MAX_LIGHT_CLUSTERS];

struct {
int dirty_cells;
int ranges_uploaded;
} stats;
} vk_lights_t;

extern vk_lights_t g_lights;
Expand All @@ -77,7 +84,7 @@ typedef struct {
uint32_t offset, size;
} metadata, grid;
} vk_lights_bindings_t;
vk_lights_bindings_t VK_LightsUpload( VkCommandBuffer );
vk_lights_bindings_t VK_LightsUpload( void );

qboolean RT_GetEmissiveForTexture( vec3_t out, int texture_id );

Expand Down
6 changes: 3 additions & 3 deletions ref_vk/vk_rtx.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
X(Buffer, indices) \
X(Buffer, vertices) \
X(Buffer, lights) \
X(Buffer, light_clusters) \
X(Buffer, light_grid) \
X(Texture, textures) \
X(Texture, skybox)

Expand Down Expand Up @@ -219,7 +219,7 @@ static void performTracing(VkCommandBuffer cmdbuf, const perform_tracing_args_t*

// TODO move this to lights
RES_SET_BUFFER(lights, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args->light_bindings->buffer, args->light_bindings->metadata.offset, args->light_bindings->metadata.size);
RES_SET_BUFFER(light_clusters, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args->light_bindings->buffer, args->light_bindings->grid.offset, args->light_bindings->grid.size);
RES_SET_BUFFER(light_grid, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args->light_bindings->buffer, args->light_bindings->grid.offset, args->light_bindings->grid.size);
#undef RES_SET_SBUFFER_FULL
#undef RES_SET_BUFFER

Expand Down Expand Up @@ -547,7 +547,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
// FIXME pass these matrices explicitly to let RTX module handle ubo itself

RT_LightsFrameEnd();
const vk_lights_bindings_t light_bindings = VK_LightsUpload(cmdbuf);
const vk_lights_bindings_t light_bindings = VK_LightsUpload();

g_rtx.frame_number++;

Expand Down

0 comments on commit 3edcb7c

Please sign in to comment.