Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement cached texture uploads utilizing PF_State #6

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions include/Vulkanator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <AEConfig.h>

#include <AE_Effect.h>
#include <AE_EffectSuites.h>
#include <entry.h>

#include "VulkanConfig.hpp"
Expand Down Expand Up @@ -103,11 +104,13 @@ struct SequenceParams
vk::UniqueBuffer StagingBuffer = {};
vk::UniqueDeviceMemory StagingBufferMemory = {};

// We use these structs so that we can easily "==" compare the image in
// the cache with any new requests coming in
vk::ImageCreateInfo InputImageInfoCache = {};
// We use ImageCreateInfo so that we can easily "==" compare the image
// in the cache with the image being rendered for the current frame
// in the case that we can re-use the memory directly rather than
// allocating a new buffer
vk::ImageCreateInfo OutputImageInfoCache = {};

PF_State InputImageState = {};
vk::UniqueImage InputImage = {};
vk::UniqueDeviceMemory InputImageMemory = {};

Expand Down
215 changes: 125 additions & 90 deletions source/Vulkanator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1277,13 +1277,32 @@ PF_Err SmartRender(
InputImageInfo.tiling = vk::ImageTiling::eOptimal;
InputImageInfo.usage
= vk::ImageUsageFlagBits::eTransferSrc
| vk::ImageUsageFlagBits::eTransferDst // Will be trasnferring from the
| vk::ImageUsageFlagBits::eTransferDst // Will be transferring from the
// staging buffer into this one
| vk::ImageUsageFlagBits::eSampled; // Will be sampling from this image
InputImageInfo.sharingMode = vk::SharingMode::eExclusive;
InputImageInfo.initialLayout = vk::ImageLayout::eUndefined;

if( InputImageInfo == SequenceParam->Cache.InputImageInfoCache )
// Get "hash" of current input image
const A_Time CurTime = {in_data->current_time, in_data->time_scale};
const A_Time CurTimeStep = {in_data->time_step, in_data->time_scale};

PF_State CurState = {};
ERR(suites.ParamUtilsSuite3()->PF_GetCurrentState(
in_data->effect_ref, Vulkanator::ParamID::Input, &CurTime, &CurTimeStep,
&CurState
));

// Compare hash of the currently uploaded texture against the cached one
// This primarily helps redundantly uploading the input texture in the case
// of still-images and hold-frames
A_Boolean InputImageStateIsSame = false;
ERR(suites.ParamUtilsSuite3()->PF_AreStatesIdentical(
in_data->effect_ref, &SequenceParam->Cache.InputImageState, &CurState,
&InputImageStateIsSame
));

if( InputImageStateIsSame )
{
// Cache Hit
}
Expand All @@ -1299,20 +1318,21 @@ PF_Err SmartRender(
InputImageInfo, vk::MemoryPropertyFlagBits::eDeviceLocal
)
.value();
SequenceParam->Cache.InputImageInfoCache = InputImageInfo;
SequenceParam->Cache.InputImageState = CurState;
}

// This provides a mapping between the image contents and the staging buffer
// This provides a mapping between the image contents and the staging
// buffer
const vk::BufferImageCopy InputBufferMapping(
0, std::uint32_t(InputLayer->rowbytes / PixelSize), 0,
vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1),
vk::Offset3D(0, 0, 0),
vk::Extent3D(InputLayer->width, InputLayer->height, 1)
);

// Input image view, this is used to create an interpretation of a certain
// aspect of the image This allows things like having a 2D image array but
// creating a view around just one of the images
// Input image view, this is used to create an interpretation of a
// certain aspect of the image This allows things like having a 2D image
// array but creating a view around just one of the images
vk::ImageViewCreateInfo InputImageViewInfo = {};
// The target image we are making a view of
InputImageViewInfo.image = SequenceParam->Cache.InputImage.get();
Expand All @@ -1324,14 +1344,12 @@ PF_Err SmartRender(
InputImageViewInfo.components.b = vk::ComponentSwizzle::eIdentity;
InputImageViewInfo.components.a = vk::ComponentSwizzle::eIdentity;
InputImageViewInfo.subresourceRange = vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of the
// image
vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of
// the image
0, 1, // A single mipmap, mipmap 0
0, 1 // A single image layer, layer 0
);

SequenceParam->Cache.InputImageInfoCache = InputImageInfo;

vk::UniqueImageView InputImageView = {};
if( auto ImageViewResult
= GlobalParam->Device->createImageViewUnique(InputImageViewInfo);
Expand All @@ -1356,11 +1374,12 @@ PF_Err SmartRender(
OutputImageInfo.samples = vk::SampleCountFlagBits::e1;
OutputImageInfo.tiling = vk::ImageTiling::eOptimal;
OutputImageInfo.usage
= vk::ImageUsageFlagBits::eTransferSrc // Will be transferring from this
// image into the staging buffer
| vk::ImageUsageFlagBits::eColorAttachment; // Will be rendering into
// this image within a
// render pass
= vk::ImageUsageFlagBits::eTransferSrc // Will be transferring from
// this image into the
// staging buffer
| vk::ImageUsageFlagBits::eColorAttachment; // Will be rendering
// into this image
// within a render pass
OutputImageInfo.sharingMode = vk::SharingMode::eExclusive;
OutputImageInfo.initialLayout = vk::ImageLayout::eUndefined;

Expand All @@ -1383,17 +1402,18 @@ PF_Err SmartRender(
SequenceParam->Cache.OutputImageInfoCache = OutputImageInfo;
}

// This provides a mapping between the image contents and the staging buffer
// This provides a mapping between the image contents and the staging
// buffer
const vk::BufferImageCopy OutputBufferMapping(
0, std::uint32_t(OutputLayer->rowbytes / PixelSize), 0,
vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1),
vk::Offset3D(0, 0, 0),
vk::Extent3D(OutputLayer->width, OutputLayer->height, 1)
);

// Output image view, this is used to create an interpretation of a certain
// aspect of the image This allows things like having a 2D image array but
// creating a view around just one of the images
// Output image view, this is used to create an interpretation of a
// certain aspect of the image This allows things like having a 2D image
// array but creating a view around just one of the images
vk::ImageViewCreateInfo OutputImageViewInfo = {};
// The target image we are making a view of
OutputImageViewInfo.image = SequenceParam->Cache.OutputImage.get();
Expand All @@ -1405,8 +1425,8 @@ PF_Err SmartRender(
OutputImageViewInfo.components.b = vk::ComponentSwizzle::eIdentity;
OutputImageViewInfo.components.a = vk::ComponentSwizzle::eIdentity;
OutputImageViewInfo.subresourceRange = vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of the
// image
vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of
// the image
0, 1, // A single mipmap, mipmap 0
0, 1 // A single image layer, layer 0
);
Expand Down Expand Up @@ -1462,10 +1482,10 @@ PF_Err SmartRender(
}

// Write combined image+sampler object into the descriptor set
// Here, we combine both the sampler and the image, and we state the format
// that the image will be in by the time this sampler will be in-use, which
// is ideally "shader read only optimal" immediately after we are done
// uploading the texture to the GPU
// Here, we combine both the sampler and the image, and we state the
// format that the image will be in by the time this sampler will be
// in-use, which is ideally "shader read only optimal" immediately after
// we are done uploading the texture to the GPU
vk::DescriptorImageInfo InputImageSamplerWrite(
FrameParam->InputImageSampler.get(), InputImageView.get(),
vk::ImageLayout::eShaderReadOnlyOptimal
Expand All @@ -1485,14 +1505,14 @@ PF_Err SmartRender(
{}
);

// Create Render pass Framebuffer, this maps the Output buffer as a color
// attachment for a Renderpass to render into You can add more attachments
// of different formats, but they must all have the same width,height,layers
// Framebuffers will define the image data that render passes will be able
// to address in total
// Create Render pass Framebuffer, this maps the Output buffer as a
// color attachment for a Renderpass to render into You can add more
// attachments of different formats, but they must all have the same
// width,height,layers Framebuffers will define the image data that
// render passes will be able to address in total
vk::FramebufferCreateInfo OutputFramebufferInfo = {};
// This is for the framebuffer to know what ~~~compatible~~~ renderpasses
// will be rendered into it
// This is for the framebuffer to know what ~~~compatible~~~
// renderpasses will be rendered into it
// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#renderpass-compatibility
OutputFramebufferInfo.renderPass
= GlobalParam->RenderPasses[FrameParam->Uniforms.Depth].get();
Expand All @@ -1518,8 +1538,8 @@ PF_Err SmartRender(
return PF_Err_INTERNAL_STRUCT_DAMAGED;
}

// Copy Input image data into staging buffer, but keep it mapped, as we will
// read the output image data from it later too
// Copy Input image data into staging buffer, but keep it mapped, as we
// will read the output image data from it later too
void* StagingBufferMapping = nullptr;

if( auto MapResult = GlobalParam->Device->mapMemory(
Expand All @@ -1535,12 +1555,16 @@ PF_Err SmartRender(
return PF_Err_INTERNAL_STRUCT_DAMAGED;
}

// Copy into staging buffer
std::memcpy(
StagingBufferMapping, InputLayer->data,
InputLayer->rowbytes * InputLayer->height
);
if( !InputImageStateIsSame )
{
// Copy Input Image into staging buffer
std::memcpy(
StagingBufferMapping, InputLayer->data,
InputLayer->rowbytes * InputLayer->height
);
}

// Upload uniform data
if( auto MapResult = GlobalParam->Device->mapMemory(
SequenceParam->UniformBufferMemory.get(), 0, VK_WHOLE_SIZE
);
Expand Down Expand Up @@ -1581,58 +1605,69 @@ PF_Err SmartRender(
{
////// Upload staging buffer into Input Image

// Layout transitions, prepare to copy
// Transfer buffers into images
Cmd.pipelineBarrier(
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {},
{// Get staging buffer ready for a read
vk::BufferMemoryBarrier(
vk::AccessFlags(), vk::AccessFlagBits::eTransferRead,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.StagingBuffer.get(), 0u, VK_WHOLE_SIZE
)},
{
// Get Input Image ready to be written to
vk::ImageMemoryBarrier(
vk::AccessFlags(), vk::AccessFlagBits::eTransferWrite,
vk::ImageLayout::eUndefined,
vk::ImageLayout::eTransferDstOptimal,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.InputImage.get(),
vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
)
),
}
);
if( !InputImageStateIsSame )
{
// Layout transitions, prepare to copy
// Transfer buffers into images
Cmd.pipelineBarrier(
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {},
{// Get staging buffer ready for a read
vk::BufferMemoryBarrier(
vk::AccessFlags(), vk::AccessFlagBits::eTransferRead,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.StagingBuffer.get(), 0u, VK_WHOLE_SIZE
)},
{
// Get Input Image ready to be written to
vk::ImageMemoryBarrier(
vk::AccessFlags(), vk::AccessFlagBits::eTransferWrite,
vk::ImageLayout::eUndefined,
vk::ImageLayout::eTransferDstOptimal,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.InputImage.get(),
vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
)
),
}
);

// Upload input image data from staging buffer into Input Image
Cmd.copyBufferToImage(
SequenceParam->Cache.StagingBuffer.get(),
SequenceParam->Cache.InputImage.get(),
vk::ImageLayout::eTransferDstOptimal, {InputBufferMapping}
);
// Upload input image data from staging buffer into Input Image
Cmd.copyBufferToImage(
SequenceParam->Cache.StagingBuffer.get(),
SequenceParam->Cache.InputImage.get(),
vk::ImageLayout::eTransferDstOptimal, {InputBufferMapping}
);

// Layout transitions, copy is complete, ready input image to be sampled
// from
// Layout transitions, copy is complete, ready input image to be
// sampled from
Cmd.pipelineBarrier(
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlags(), {}, {},
{// Input Image is going to be read
vk::ImageMemoryBarrier(
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eShaderRead,
vk::ImageLayout::eTransferDstOptimal,
vk::ImageLayout::eShaderReadOnlyOptimal,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.InputImage.get(),
vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
)
)}
);
}

// Layout transitions, copy is complete, ready input image to be
// sampled from
Cmd.pipelineBarrier(
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eComputeShader, vk::DependencyFlags(),
{}, {},
{// Input Image is going to be read
vk::ImageMemoryBarrier(
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eShaderRead,
vk::ImageLayout::eTransferDstOptimal,
vk::ImageLayout::eShaderReadOnlyOptimal,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
SequenceParam->Cache.InputImage.get(),
vk::ImageSubresourceRange(
vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
)
),
// Output Image is going to be written to as a color attachment
{// Output Image is going to be written to as a color attachment
// within a render pass
vk::ImageMemoryBarrier(
vk::AccessFlags(), vk::AccessFlagBits::eShaderWrite,
Expand All @@ -1658,10 +1693,10 @@ PF_Err SmartRender(
BeginInfo.framebuffer = OutputFramebuffer.get();

// Rectangular region of the output buffer to render into
// TODO: we could potentially have a cached layer-sized output image,
// and only render into a subset of this image using extent_hint if we
// wanted to. But we use the exact output size for more immediate memory
// savings
// TODO: we could potentially have a cached layer-sized output
// image, and only render into a subset of this image using
// extent_hint if we wanted to. But we use the exact output size for
// more immediate memory savings
BeginInfo.renderArea.offset.x = BeginInfo.renderArea.offset.y = 0;
BeginInfo.renderArea.extent.width = std::uint32_t(OutputLayer->width);
BeginInfo.renderArea.extent.height = std::uint32_t(OutputLayer->height);
Expand Down