Skip to content

Commit

Permalink
Fix fog blur vignette (#212)
Browse files Browse the repository at this point in the history
* Work on fixing the incorrect vignette effect produced by the generic blur method

* Exchange gaussian blur for box blur

* Fix blur shaders
  • Loading branch information
Duttenheim authored Mar 20, 2024
1 parent 389edd1 commit f0e7e29
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 67 deletions.
4 changes: 2 additions & 2 deletions code/render/fog/volumetricfogcontext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ VolumetricFogContext::Create(const Ptr<Frame::FrameScript>& frameScript)
}

blurState.blurShader = CoreGraphics::ShaderGet("shd:system_shaders/blur/blur_2d_rgba16f_cs.fxb");
blurState.blurXProgram = ShaderGetProgram(blurState.blurShader, CoreGraphics::ShaderFeatureMask("Alt0"));
blurState.blurYProgram = ShaderGetProgram(blurState.blurShader, CoreGraphics::ShaderFeatureMask("Alt1"));
blurState.blurXProgram = ShaderGetProgram(blurState.blurShader, CoreGraphics::ShaderFeatureMask("BlurX"));
blurState.blurYProgram = ShaderGetProgram(blurState.blurShader, CoreGraphics::ShaderFeatureMask("BlurY"));
blurState.blurXTable.Resize(CoreGraphics::GetNumBufferedFrames());
blurState.blurYTable.Resize(CoreGraphics::GetNumBufferedFrames());

Expand Down
4 changes: 2 additions & 2 deletions syswork/shaders/vk/blur/blur_2d_rgba16f_cs.fx
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
//------------------------------------------------------------------------------
/**
*/
program BlurX [ string Mask = "Alt0"; ]
program BlurX [ string Mask = "BlurX"; ]
{
ComputeShader = csMainX();
};

program BlurY [ string Mask = "Alt1"; ]
program BlurY [ string Mask = "BlurY"; ]
{
ComputeShader = csMainY();
};
95 changes: 35 additions & 60 deletions syswork/shaders/vk/blur/blur_cs.fxh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ sampler_state InputSampler
#define GAUSSIAN_KERNEL_SIZE_65 1
#endif

#define BLUR_TILE_WIDTH 256
#define BLUR_TILE_WIDTH (64 - KERNEL_RADIUS * 2)
const uint BlurTileWidth = BLUR_TILE_WIDTH;
#define SHARED_MEM_SIZE (KERNEL_RADIUS + BLUR_TILE_WIDTH + KERNEL_RADIUS)

Expand Down Expand Up @@ -100,21 +100,21 @@ csMainX()
ivec2 size = imageSize(BlurImageX).xy;

// calculate offsets
const uint tileStart = int(gl_WorkGroupID.x) * BLUR_TILE_WIDTH;
const uint tileEnd = tileStart + BLUR_TILE_WIDTH;
const uint apronStart = max(0, int(tileStart) - KERNEL_RADIUS);
const uint apronEnd = tileEnd + KERNEL_RADIUS;

const uint x = apronStart + gl_LocalInvocationID.x;
const uint y = gl_WorkGroupID.y;
const uint z = gl_WorkGroupID.z;

const int tileStart = int(gl_WorkGroupID.x) * BLUR_TILE_WIDTH;
const int tileEnd = tileStart + BLUR_TILE_WIDTH;
const int apronStart = int(tileStart) - KERNEL_RADIUS;
const int apronEnd = tileEnd + KERNEL_RADIUS;

const int x = max(0, min(apronStart + int(gl_LocalInvocationID.x), size.x - 1));
const int y = int(gl_WorkGroupID.y);

// load into workgroup saved memory, this allows us to use the original pixel even though
// we might have replaced it with the result from this thread!
#if IMAGE_IS_ARRAY
SharedMemory[gl_LocalInvocationID.x] = IMAGE_LOAD_SWIZZLE(texelFetch(sampler2DArray(InputImageX, InputSampler), ivec3(x, y, z), 0));
const uint z = gl_WorkGroupID.z;
SharedMemory[gl_LocalInvocationID.x] = IMAGE_LOAD_SWIZZLE(imageFetch2DArray(InputImageX, InputSampler, ivec3(x, y, z), 0));
#else
SharedMemory[gl_LocalInvocationID.x] = IMAGE_LOAD_SWIZZLE(texelFetch(sampler2D(InputImageX, InputSampler), ivec2(x, y), 0));
SharedMemory[gl_LocalInvocationID.x] = IMAGE_LOAD_SWIZZLE(imageFetch2D(InputImageX, InputSampler, ivec2(x, y), 0));
#endif
groupMemoryBarrier();
barrier();
Expand All @@ -127,29 +127,17 @@ csMainX()
IMAGE_LOAD_VEC blurTotal = IMAGE_LOAD_VEC(0);

int i;
uint max;
#pragma unroll
for (i = 0; i < KERNEL_RADIUS * 2 + 1; ++i)
for (i = 0; i <= KERNEL_RADIUS * 2; ++i)
{
// Sample the pre-filtered data with step size = 2 pixels
uint j = uint(i) + gl_LocalInvocationID.x;
IMAGE_LOAD_VEC samp;
if (j >= tileEndClamped)
samp = SharedMemory[max];
else
{
samp = SharedMemory[j];
max = j;
}
float weight = weights[i];
blurTotal += weight * samp;
}

IMAGE_LOAD_VEC color = blurTotal;
int j = max(0, min(int(gl_LocalInvocationID.x) + i, SHARED_MEM_SIZE - 1));
blurTotal += weights[i] * SharedMemory[j];
}

#if IMAGE_IS_ARRAY
imageStore(BlurImageX, ivec3(writePos, y, z), RESULT_TO_VEC4(color));
imageStore(BlurImageX, ivec3(writePos, y, z), RESULT_TO_VEC4(blurTotal));
#else
imageStore(BlurImageX, ivec2(writePos, y), RESULT_TO_VEC4(color));
imageStore(BlurImageX, ivec2(writePos, y), RESULT_TO_VEC4(blurTotal));
#endif
}
}
Expand All @@ -166,21 +154,21 @@ csMainY()
ivec2 size = imageSize(BlurImageY).xy;

// calculate offsets
const uint tileStart = int(gl_WorkGroupID.x) * BLUR_TILE_WIDTH;
const uint tileEnd = tileStart + BLUR_TILE_WIDTH;
const uint apronStart = max(0, int(tileStart) - KERNEL_RADIUS);
const uint apronEnd = tileEnd + KERNEL_RADIUS;
const int tileStart = int(gl_WorkGroupID.x) * BLUR_TILE_WIDTH;
const int tileEnd = tileStart + BLUR_TILE_WIDTH;
const int apronStart = int(tileStart) - KERNEL_RADIUS;
const int apronEnd = tileEnd + KERNEL_RADIUS;

const uint x = gl_WorkGroupID.y;
const uint y = apronStart + gl_LocalInvocationID.x;
const uint z = gl_WorkGroupID.z;
const int x = int(gl_WorkGroupID.y);
const int y = max(0, min(apronStart + int(gl_LocalInvocationID.x), size.y - 1));

// load into workgroup saved memory, this allows us to use the original pixel even though
// we might have replaced it with the result from this thread!
#if IMAGE_IS_ARRAY
SharedMemory[gl_LocalInvocationID.x] = IMAGE_LOAD_SWIZZLE(texelFetch(sampler2DArray(InputImageY, InputSampler), ivec3(x, y, z), 0));
const uint z = gl_WorkGroupID.z;
SharedMemory[gl_LocalInvocationID.x] = IMAGE_LOAD_SWIZZLE(imageFetch2DArray(InputImageY, InputSampler, ivec3(x, y, z), 0));
#else
SharedMemory[gl_LocalInvocationID.x] = IMAGE_LOAD_SWIZZLE(texelFetch(sampler2D(InputImageY, InputSampler), ivec2(x, y), 0));
SharedMemory[gl_LocalInvocationID.x] = IMAGE_LOAD_SWIZZLE(imageFetch2D(InputImageY, InputSampler, ivec2(x, y), 0));
#endif
groupMemoryBarrier();
barrier();
Expand All @@ -192,30 +180,17 @@ csMainY()
{
IMAGE_LOAD_VEC blurTotal = IMAGE_LOAD_VEC(0);

int i;
uint max;
#pragma unroll
for (i = 0; i < KERNEL_RADIUS * 2 + 1; ++i)
for (int i = 0; i <= KERNEL_RADIUS * 2; ++i)
{
// Sample the pre-filtered data with step size = 2 pixels
uint j = uint(i) + gl_LocalInvocationID.x;
IMAGE_LOAD_VEC samp;
if (j >= tileEndClamped)
samp = SharedMemory[max];
else
{
samp = SharedMemory[j];
max = j;
}
float weight = weights[i];
blurTotal += weight * samp;
}

IMAGE_LOAD_VEC color = blurTotal;
int j = max(0, min(int(gl_LocalInvocationID.x) + i, SHARED_MEM_SIZE - 1));
blurTotal += weights[i] * SharedMemory[j];
}

#if IMAGE_IS_ARRAY
imageStore(BlurImageY, ivec3(x, writePos, z), RESULT_TO_VEC4(color));
imageStore(BlurImageY, ivec3(x, writePos, z), RESULT_TO_VEC4(blurTotal));
#else
imageStore(BlurImageY, ivec2(x, writePos), RESULT_TO_VEC4(color));
imageStore(BlurImageY, ivec2(x, writePos), RESULT_TO_VEC4(blurTotal));
#endif
}
}
4 changes: 2 additions & 2 deletions syswork/shaders/vk/hbaoblur_cs.fx
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ csMainX()
// calculate offsets
const uint tileStart = int(gl_WorkGroupID.x) * HBAO_TILE_WIDTH;
const uint tileEnd = tileStart + HBAO_TILE_WIDTH;
const uint apronStart = tileStart - KERNEL_RADIUS;
const uint apronStart = max(0, int(tileStart) - KERNEL_RADIUS);
const uint apronEnd = tileEnd + KERNEL_RADIUS;

const uint x = apronStart + gl_LocalInvocationID.x;
Expand Down Expand Up @@ -133,7 +133,7 @@ csMainY()
// calculate offsets
const uint tileStart = int(gl_WorkGroupID.x) * HBAO_TILE_WIDTH;
const uint tileEnd = tileStart + HBAO_TILE_WIDTH;
const uint apronStart = tileStart - KERNEL_RADIUS;
const uint apronStart = max(0, int(tileStart) - KERNEL_RADIUS);
const uint apronEnd = tileEnd + KERNEL_RADIUS;

const uint x = gl_WorkGroupID.y;
Expand Down
5 changes: 4 additions & 1 deletion syswork/shaders/vk/lib/shared.fxh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ group(TICK_GROUP) sampler_state LinearSampler { Filter = Linear; AddressU = Cla
#define fetch2D(handle, sampler, uv, lod) texelFetch(sampler2D(Textures2D[handle], sampler), uv, lod)
#define fetch2DMS(handle, sampler, uv, lod) texelFetch(sampler2DMS(Textures2DMS[handle], sampler), uv, lod)
#define fetchCube(handle, sampler, uvw, lod) texelFetch(sampler2DArray(Textures2DArray[handle], sampler), uvw, lod)
#define fetchArray(handle, sampler, uvw, lod) texelFetch(sampler2DArray(Textures2DArray[handle], sampler), uvw, lod)
#define fetch2DArray(handle, sampler, uvw, lod) texelFetch(sampler2DArray(Textures2DArray[handle], sampler), uvw, lod)
#define fetch3D(handle, sampler, uvw, lod) texelFetch(sampler3D(Textures3D[handle], sampler), uvw, lod)
#define fetchStencil(handle, sampler, uv, lod) (floatBitsToUint(texelFetch(sampler2D(Textures2D[handle], sampler), uv, lod).r))

Expand All @@ -68,6 +68,9 @@ group(TICK_GROUP) sampler_state LinearSampler { Filter = Linear; AddressU = Cla

#define query_lod2D(handle, sampler, uv) textureQueryLod(sampler2D(Textures2D[handle], sampler), uv)

#define imageFetch2D(image, sampler, uv, lod) texelFetch(sampler2D(image, sampler), uv, lod)
#define imageFetch2DArray(image, sampler, uv, lod) texelFetch(sampler2DArray(image, sampler), uv, lod)

// these parameters are updated once per application tick
group(TICK_GROUP) shared constant PerTickParams
{
Expand Down

0 comments on commit f0e7e29

Please sign in to comment.