Skip to content

Commit

Permalink
Cache uploaded redundantly uploaded textures with P_State
Browse files Browse the repository at this point in the history
Will optimize the case where either the source-layer is either a static
image or a low-framerate gif by identifying the uploaded contents of the
image with a P_State and re-using the uploaded image rather than
uploading it each frame.
  • Loading branch information
Wunkolo committed Mar 17, 2023
1 parent debc048 commit 9277726
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 86 deletions.
2 changes: 2 additions & 0 deletions include/Vulkanator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <AEConfig.h>

#include <AE_Effect.h>
#include <AE_EffectSuites.h>
#include <entry.h>

#include "VulkanConfig.hpp"
Expand Down Expand Up @@ -108,6 +109,7 @@ struct SequenceParams
vk::ImageCreateInfo InputImageInfoCache = {};
vk::ImageCreateInfo OutputImageInfoCache = {};

PF_State InputImageState = {};
vk::UniqueImage InputImage = {};
vk::UniqueDeviceMemory InputImageMemory = {};

Expand Down
208 changes: 122 additions & 86 deletions source/Vulkanator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1283,7 +1283,24 @@ PF_Err SmartRender(
InputImageInfo.sharingMode = vk::SharingMode::eExclusive;
InputImageInfo.initialLayout = vk::ImageLayout::eUndefined;

if( InputImageInfo == SequenceParam->Cache.InputImageInfoCache )
// Get "hash" of current input image
const A_Time CurTime = {in_data->current_time, in_data->time_scale};
const A_Time CurTimeStep = {in_data->time_step, in_data->time_scale};

PF_State CurState = {};
ERR(suites.ParamUtilsSuite3()->PF_GetCurrentState(
in_data->effect_ref, Vulkanator::ParamID::Input, &CurTime, &CurTimeStep,
&CurState
));

// Compare hash of the currently uploaded texture against the cached one
A_Boolean InputImageStateIsSame = false;
ERR(suites.ParamUtilsSuite3()->PF_AreStatesIdentical(
in_data->effect_ref, &SequenceParam->Cache.InputImageState, &CurState,
&InputImageStateIsSame
));

if( InputImageStateIsSame )
{
// Cache Hit
}
Expand All @@ -1300,19 +1317,21 @@ PF_Err SmartRender(
)
.value();
SequenceParam->Cache.InputImageInfoCache = InputImageInfo;
SequenceParam->Cache.InputImageState = CurState;
}

// This provides a mapping between the image contents and the staging buffer
// This provides a mapping between the image contents and the staging
// buffer
const vk::BufferImageCopy InputBufferMapping(
0, std::uint32_t(InputLayer->rowbytes / PixelSize), 0,
vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1),
vk::Offset3D(0, 0, 0),
vk::Extent3D(InputLayer->width, InputLayer->height, 1)
);

// Input image view, this is used to create an interpretation of a certain
// aspect of the image This allows things like having a 2D image array but
// creating a view around just one of the images
// Input image view, this is used to create an interpretation of a
// certain aspect of the image This allows things like having a 2D image
// array but creating a view around just one of the images
vk::ImageViewCreateInfo InputImageViewInfo = {};
// The target image we are making a view of
InputImageViewInfo.image = SequenceParam->Cache.InputImage.get();
Expand All @@ -1324,8 +1343,8 @@ PF_Err SmartRender(
InputImageViewInfo.components.b = vk::ComponentSwizzle::eIdentity;
InputImageViewInfo.components.a = vk::ComponentSwizzle::eIdentity;
InputImageViewInfo.subresourceRange = vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of the
// image
vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of
// the image
0, 1, // A single mipmap, mipmap 0
0, 1 // A single image layer, layer 0
);
Expand Down Expand Up @@ -1356,11 +1375,12 @@ PF_Err SmartRender(
OutputImageInfo.samples = vk::SampleCountFlagBits::e1;
OutputImageInfo.tiling = vk::ImageTiling::eOptimal;
OutputImageInfo.usage
= vk::ImageUsageFlagBits::eTransferSrc // Will be transferring from this
// image into the staging buffer
| vk::ImageUsageFlagBits::eColorAttachment; // Will be rendering into
// this image within a
// render pass
= vk::ImageUsageFlagBits::eTransferSrc // Will be transferring from
// this image into the
// staging buffer
| vk::ImageUsageFlagBits::eColorAttachment; // Will be rendering
// into this image
// within a render pass
OutputImageInfo.sharingMode = vk::SharingMode::eExclusive;
OutputImageInfo.initialLayout = vk::ImageLayout::eUndefined;

Expand All @@ -1383,17 +1403,18 @@ PF_Err SmartRender(
SequenceParam->Cache.OutputImageInfoCache = OutputImageInfo;
}

// This provides a mapping between the image contents and the staging buffer
// This provides a mapping between the image contents and the staging
// buffer
const vk::BufferImageCopy OutputBufferMapping(
0, std::uint32_t(OutputLayer->rowbytes / PixelSize), 0,
vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1),
vk::Offset3D(0, 0, 0),
vk::Extent3D(OutputLayer->width, OutputLayer->height, 1)
);

// Output image view, this is used to create an interpretation of a certain
// aspect of the image This allows things like having a 2D image array but
// creating a view around just one of the images
// Output image view, this is used to create an interpretation of a
// certain aspect of the image This allows things like having a 2D image
// array but creating a view around just one of the images
vk::ImageViewCreateInfo OutputImageViewInfo = {};
// The target image we are making a view of
OutputImageViewInfo.image = SequenceParam->Cache.OutputImage.get();
Expand All @@ -1405,8 +1426,8 @@ PF_Err SmartRender(
OutputImageViewInfo.components.b = vk::ComponentSwizzle::eIdentity;
OutputImageViewInfo.components.a = vk::ComponentSwizzle::eIdentity;
OutputImageViewInfo.subresourceRange = vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of the
// image
vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of
// the image
0, 1, // A single mipmap, mipmap 0
0, 1 // A single image layer, layer 0
);
Expand Down Expand Up @@ -1462,10 +1483,10 @@ PF_Err SmartRender(
}

// Write combined image+sampler object into the descriptor set
// Here, we combine both the sampler and the image, and we state the format
// that the image will be in by the time this sampler will be in-use, which
// is ideally "shader read only optimal" immediately after we are done
// uploading the texture to the GPU
// Here, we combine both the sampler and the image, and we state the
// format that the image will be in by the time this sampler will be
// in-use, which is ideally "shader read only optimal" immediately after
// we are done uploading the texture to the GPU
vk::DescriptorImageInfo InputImageSamplerWrite(
FrameParam->InputImageSampler.get(), InputImageView.get(),
vk::ImageLayout::eShaderReadOnlyOptimal
Expand All @@ -1485,14 +1506,14 @@ PF_Err SmartRender(
{}
);

// Create Render pass Framebuffer, this maps the Output buffer as a color
// attachment for a Renderpass to render into You can add more attachments
// of different formats, but they must all have the same width,height,layers
// Framebuffers will define the image data that render passes will be able
// to address in total
// Create Render pass Framebuffer, this maps the Output buffer as a
// color attachment for a Renderpass to render into You can add more
// attachments of different formats, but they must all have the same
// width,height,layers Framebuffers will define the image data that
// render passes will be able to address in total
vk::FramebufferCreateInfo OutputFramebufferInfo = {};
// This is for the framebuffer to know what ~~~compatible~~~ renderpasses
// will be rendered into it
// This is for the framebuffer to know what ~~~compatible~~~
// renderpasses will be rendered into it
// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#renderpass-compatibility
OutputFramebufferInfo.renderPass
= GlobalParam->RenderPasses[FrameParam->Uniforms.Depth].get();
Expand All @@ -1518,8 +1539,8 @@ PF_Err SmartRender(
return PF_Err_INTERNAL_STRUCT_DAMAGED;
}

// Copy Input image data into staging buffer, but keep it mapped, as we will
// read the output image data from it later too
// Copy Input image data into staging buffer, but keep it mapped, as we
// will read the output image data from it later too
void* StagingBufferMapping = nullptr;

if( auto MapResult = GlobalParam->Device->mapMemory(
Expand All @@ -1535,12 +1556,16 @@ PF_Err SmartRender(
return PF_Err_INTERNAL_STRUCT_DAMAGED;
}

// Copy into staging buffer
std::memcpy(
StagingBufferMapping, InputLayer->data,
InputLayer->rowbytes * InputLayer->height
);
if( !InputImageStateIsSame )
{
// Copy Input Image into staging buffer
std::memcpy(
StagingBufferMapping, InputLayer->data,
InputLayer->rowbytes * InputLayer->height
);
}

// Upload uniform data
if( auto MapResult = GlobalParam->Device->mapMemory(
SequenceParam->UniformBufferMemory.get(), 0, VK_WHOLE_SIZE
);
Expand Down Expand Up @@ -1581,58 +1606,69 @@ PF_Err SmartRender(
{
////// Upload staging buffer into Input Image

// Layout transitions, prepare to copy
// Transfer buffers into images
Cmd.pipelineBarrier(
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {},
{// Get staging buffer ready for a read
vk::BufferMemoryBarrier(
vk::AccessFlags(), vk::AccessFlagBits::eTransferRead,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.StagingBuffer.get(), 0u, VK_WHOLE_SIZE
)},
{
// Get Input Image ready to be written to
vk::ImageMemoryBarrier(
vk::AccessFlags(), vk::AccessFlagBits::eTransferWrite,
vk::ImageLayout::eUndefined,
vk::ImageLayout::eTransferDstOptimal,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.InputImage.get(),
vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
)
),
}
);
if( !InputImageStateIsSame )
{
// Layout transitions, prepare to copy
// Transfer buffers into images
Cmd.pipelineBarrier(
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {},
{// Get staging buffer ready for a read
vk::BufferMemoryBarrier(
vk::AccessFlags(), vk::AccessFlagBits::eTransferRead,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.StagingBuffer.get(), 0u, VK_WHOLE_SIZE
)},
{
// Get Input Image ready to be written to
vk::ImageMemoryBarrier(
vk::AccessFlags(), vk::AccessFlagBits::eTransferWrite,
vk::ImageLayout::eUndefined,
vk::ImageLayout::eTransferDstOptimal,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.InputImage.get(),
vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
)
),
}
);

// Upload input image data from staging buffer into Input Image
Cmd.copyBufferToImage(
SequenceParam->Cache.StagingBuffer.get(),
SequenceParam->Cache.InputImage.get(),
vk::ImageLayout::eTransferDstOptimal, {InputBufferMapping}
);
// Upload input image data from staging buffer into Input Image
Cmd.copyBufferToImage(
SequenceParam->Cache.StagingBuffer.get(),
SequenceParam->Cache.InputImage.get(),
vk::ImageLayout::eTransferDstOptimal, {InputBufferMapping}
);

// Layout transitions, copy is complete, ready input image to be sampled
// from
// Layout transitions, copy is complete, ready input image to be
// sampled from
Cmd.pipelineBarrier(
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlags(), {}, {},
{// Input Image is going to be read
vk::ImageMemoryBarrier(
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eShaderRead,
vk::ImageLayout::eTransferDstOptimal,
vk::ImageLayout::eShaderReadOnlyOptimal,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.InputImage.get(),
vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
)
)}
);
}

// Layout transitions, copy is complete, ready input image to be
// sampled from
Cmd.pipelineBarrier(
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eComputeShader, vk::DependencyFlags(),
{}, {},
{// Input Image is going to be read
vk::ImageMemoryBarrier(
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eShaderRead,
vk::ImageLayout::eTransferDstOptimal,
vk::ImageLayout::eShaderReadOnlyOptimal,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.InputImage.get(),
vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
)
),
// Output Image is going to be written to as a color attachment
{// Output Image is going to be written to as a color attachment
// within a render pass
vk::ImageMemoryBarrier(
vk::AccessFlags(), vk::AccessFlagBits::eShaderWrite,
Expand All @@ -1658,10 +1694,10 @@ PF_Err SmartRender(
BeginInfo.framebuffer = OutputFramebuffer.get();

// Rectangular region of the output buffer to render into
// TODO: we could potentially have a cached layer-sized output image,
// and only render into a subset of this image using extent_hint if we
// wanted to. But we use the exact output size for more immediate memory
// savings
// TODO: we could potentially have a cached layer-sized output
// image, and only render into a subset of this image using
// extent_hint if we wanted to. But we use the exact output size for
// more immediate memory savings
BeginInfo.renderArea.offset.x = BeginInfo.renderArea.offset.y = 0;
BeginInfo.renderArea.extent.width = std::uint32_t(OutputLayer->width);
BeginInfo.renderArea.extent.height = std::uint32_t(OutputLayer->height);
Expand Down

0 comments on commit 9277726

Please sign in to comment.