Wunkolo · Wunkolo · Mar 16, 2023 · Sep 13, 2024
diff --git a/include/Vulkanator.hpp b/include/Vulkanator.hpp
@@ -8,6 +8,7 @@
 #include <AEConfig.h>
 
 #include <AE_Effect.h>
+#include <AE_EffectSuites.h>
 #include <entry.h>
 
 #include "VulkanConfig.hpp"
@@ -103,11 +104,13 @@ struct SequenceParams
 		vk::UniqueBuffer       StagingBuffer       = {};
 		vk::UniqueDeviceMemory StagingBufferMemory = {};
 
-		// We use these structs so that we can easily "==" compare the image in
-		// the cache with any new requests coming in
-		vk::ImageCreateInfo InputImageInfoCache  = {};
+		// We use ImageCreateInfo so that we can easily "==" compare the image
+		// in the cache with the image being rendered for the current frame
+		// in the case that we can re-use the memory directly rather than
+		// allocating a new buffer
 		vk::ImageCreateInfo OutputImageInfoCache = {};
 
+		PF_State               InputImageState  = {};
 		vk::UniqueImage        InputImage       = {};
 		vk::UniqueDeviceMemory InputImageMemory = {};
 

diff --git a/source/Vulkanator.cpp b/source/Vulkanator.cpp
@@ -1277,13 +1277,32 @@ PF_Err SmartRender(
 	InputImageInfo.tiling      = vk::ImageTiling::eOptimal;
 	InputImageInfo.usage
 		= vk::ImageUsageFlagBits::eTransferSrc
-		| vk::ImageUsageFlagBits::eTransferDst // Will be trasnferring from the
+		| vk::ImageUsageFlagBits::eTransferDst // Will be transferring from the
 											   // staging buffer into this one
 		| vk::ImageUsageFlagBits::eSampled; // Will be sampling from this image
 	InputImageInfo.sharingMode   = vk::SharingMode::eExclusive;
 	InputImageInfo.initialLayout = vk::ImageLayout::eUndefined;
 
-	if( InputImageInfo == SequenceParam->Cache.InputImageInfoCache )
+	// Get "hash" of current input image
+	const A_Time CurTime     = {in_data->current_time, in_data->time_scale};
+	const A_Time CurTimeStep = {in_data->time_step, in_data->time_scale};
+
+	PF_State CurState = {};
+	ERR(suites.ParamUtilsSuite3()->PF_GetCurrentState(
+		in_data->effect_ref, Vulkanator::ParamID::Input, &CurTime, &CurTimeStep,
+		&CurState
+	));
+
+	// Compare hash of the currently uploaded texture against the cached one
+	// This primarily helps redundantly uploading the input texture in the case
+	// of still-images and hold-frames
+	A_Boolean InputImageStateIsSame = false;
+	ERR(suites.ParamUtilsSuite3()->PF_AreStatesIdentical(
+		in_data->effect_ref, &SequenceParam->Cache.InputImageState, &CurState,
+		&InputImageStateIsSame
+	));
+
+	if( InputImageStateIsSame )
 	{
 		// Cache Hit
 	}
@@ -1299,20 +1318,21 @@ PF_Err SmartRender(
 				  InputImageInfo, vk::MemoryPropertyFlagBits::eDeviceLocal
 			)
 				  .value();
-		SequenceParam->Cache.InputImageInfoCache = InputImageInfo;
+		SequenceParam->Cache.InputImageState = CurState;
 	}
 
-	// This provides a mapping between the image contents and the staging buffer
+	// This provides a mapping between the image contents and the staging
+	// buffer
 	const vk::BufferImageCopy InputBufferMapping(
 		0, std::uint32_t(InputLayer->rowbytes / PixelSize), 0,
 		vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1),
 		vk::Offset3D(0, 0, 0),
 		vk::Extent3D(InputLayer->width, InputLayer->height, 1)
 	);
 
-	// Input image view, this is used to create an interpretation of a certain
-	// aspect of the image This allows things like having a 2D image array but
-	// creating a view around just one of the images
+	// Input image view, this is used to create an interpretation of a
+	// certain aspect of the image This allows things like having a 2D image
+	// array but creating a view around just one of the images
 	vk::ImageViewCreateInfo InputImageViewInfo = {};
 	// The target image we are making a view of
 	InputImageViewInfo.image    = SequenceParam->Cache.InputImage.get();
@@ -1324,14 +1344,12 @@ PF_Err SmartRender(
 	InputImageViewInfo.components.b     = vk::ComponentSwizzle::eIdentity;
 	InputImageViewInfo.components.a     = vk::ComponentSwizzle::eIdentity;
 	InputImageViewInfo.subresourceRange = vk::ImageSubresourceRange(
-		vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of the
-										 // image
+		vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of
+										 // the image
 		0, 1,                            // A single mipmap, mipmap 0
 		0, 1                             // A single image layer, layer 0
 	);
 
-	SequenceParam->Cache.InputImageInfoCache = InputImageInfo;
-
 	vk::UniqueImageView InputImageView = {};
 	if( auto ImageViewResult
 		= GlobalParam->Device->createImageViewUnique(InputImageViewInfo);
@@ -1356,11 +1374,12 @@ PF_Err SmartRender(
 	OutputImageInfo.samples     = vk::SampleCountFlagBits::e1;
 	OutputImageInfo.tiling      = vk::ImageTiling::eOptimal;
 	OutputImageInfo.usage
-		= vk::ImageUsageFlagBits::eTransferSrc // Will be transferring from this
-											   // image into the staging buffer
-		| vk::ImageUsageFlagBits::eColorAttachment; // Will be rendering into
-													// this image within a
-													// render pass
+		= vk::ImageUsageFlagBits::eTransferSrc      // Will be transferring from
+													// this image into the
+													// staging buffer
+		| vk::ImageUsageFlagBits::eColorAttachment; // Will be rendering
+													// into this image
+													// within a render pass
 	OutputImageInfo.sharingMode   = vk::SharingMode::eExclusive;
 	OutputImageInfo.initialLayout = vk::ImageLayout::eUndefined;
 
@@ -1383,17 +1402,18 @@ PF_Err SmartRender(
 		SequenceParam->Cache.OutputImageInfoCache = OutputImageInfo;
 	}
 
-	// This provides a mapping between the image contents and the staging buffer
+	// This provides a mapping between the image contents and the staging
+	// buffer
 	const vk::BufferImageCopy OutputBufferMapping(
 		0, std::uint32_t(OutputLayer->rowbytes / PixelSize), 0,
 		vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1),
 		vk::Offset3D(0, 0, 0),
 		vk::Extent3D(OutputLayer->width, OutputLayer->height, 1)
 	);
 
-	// Output image view, this is used to create an interpretation of a certain
-	// aspect of the image This allows things like having a 2D image array but
-	// creating a view around just one of the images
+	// Output image view, this is used to create an interpretation of a
+	// certain aspect of the image This allows things like having a 2D image
+	// array but creating a view around just one of the images
 	vk::ImageViewCreateInfo OutputImageViewInfo = {};
 	// The target image we are making a view of
 	OutputImageViewInfo.image    = SequenceParam->Cache.OutputImage.get();
@@ -1405,8 +1425,8 @@ PF_Err SmartRender(
 	OutputImageViewInfo.components.b     = vk::ComponentSwizzle::eIdentity;
 	OutputImageViewInfo.components.a     = vk::ComponentSwizzle::eIdentity;
 	OutputImageViewInfo.subresourceRange = vk::ImageSubresourceRange(
-		vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of the
-										 // image
+		vk::ImageAspectFlagBits::eColor, // We want the "Color" aspect of
+										 // the image
 		0, 1,                            // A single mipmap, mipmap 0
 		0, 1                             // A single image layer, layer 0
 	);
@@ -1462,10 +1482,10 @@ PF_Err SmartRender(
 	}
 
 	// Write combined image+sampler object into the descriptor set
-	// Here, we combine both the sampler and the image, and we state the format
-	// that the image will be in by the time this sampler will be in-use, which
-	// is ideally "shader read only optimal" immediately after we are done
-	// uploading the texture to the GPU
+	// Here, we combine both the sampler and the image, and we state the
+	// format that the image will be in by the time this sampler will be
+	// in-use, which is ideally "shader read only optimal" immediately after
+	// we are done uploading the texture to the GPU
 	vk::DescriptorImageInfo InputImageSamplerWrite(
 		FrameParam->InputImageSampler.get(), InputImageView.get(),
 		vk::ImageLayout::eShaderReadOnlyOptimal
@@ -1485,14 +1505,14 @@ PF_Err SmartRender(
 		{}
 	);
 
-	// Create Render pass Framebuffer, this maps the Output buffer as a color
-	// attachment for a Renderpass to render into You can add more attachments
-	// of different formats, but they must all have the same width,height,layers
-	// Framebuffers will define the image data that render passes will be able
-	// to address in total
+	// Create Render pass Framebuffer, this maps the Output buffer as a
+	// color attachment for a Renderpass to render into You can add more
+	// attachments of different formats, but they must all have the same
+	// width,height,layers Framebuffers will define the image data that
+	// render passes will be able to address in total
 	vk::FramebufferCreateInfo OutputFramebufferInfo = {};
-	// This is for the framebuffer to know what ~~~compatible~~~ renderpasses
-	// will be rendered into it
+	// This is for the framebuffer to know what ~~~compatible~~~
+	// renderpasses will be rendered into it
 	// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#renderpass-compatibility
 	OutputFramebufferInfo.renderPass
 		= GlobalParam->RenderPasses[FrameParam->Uniforms.Depth].get();
@@ -1518,8 +1538,8 @@ PF_Err SmartRender(
 		return PF_Err_INTERNAL_STRUCT_DAMAGED;
 	}
 
-	// Copy Input image data into staging buffer, but keep it mapped, as we will
-	// read the output image data from it later too
+	// Copy Input image data into staging buffer, but keep it mapped, as we
+	// will read the output image data from it later too
 	void* StagingBufferMapping = nullptr;
 
 	if( auto MapResult = GlobalParam->Device->mapMemory(
@@ -1535,12 +1555,16 @@ PF_Err SmartRender(
 		return PF_Err_INTERNAL_STRUCT_DAMAGED;
 	}
 
-	// Copy into staging buffer
-	std::memcpy(
-		StagingBufferMapping, InputLayer->data,
-		InputLayer->rowbytes * InputLayer->height
-	);
+	if( !InputImageStateIsSame )
+	{
+		// Copy Input Image into staging buffer
+		std::memcpy(
+			StagingBufferMapping, InputLayer->data,
+			InputLayer->rowbytes * InputLayer->height
+		);
+	}
 
+	// Upload uniform data
 	if( auto MapResult = GlobalParam->Device->mapMemory(
 			SequenceParam->UniformBufferMemory.get(), 0, VK_WHOLE_SIZE
 		);
@@ -1581,58 +1605,69 @@ PF_Err SmartRender(
 	{
 		////// Upload staging buffer into Input Image
 
-		// Layout transitions, prepare to copy
-		// Transfer buffers into images
-		Cmd.pipelineBarrier(
-			vk::PipelineStageFlagBits::eHost,
-			vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {},
-			{// Get staging buffer ready for a read
-			 vk::BufferMemoryBarrier(
-				 vk::AccessFlags(), vk::AccessFlagBits::eTransferRead,
-				 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
-				 SequenceParam->Cache.StagingBuffer.get(), 0u, VK_WHOLE_SIZE
-			 )},
-			{
-				// Get Input Image ready to be written to
-				vk::ImageMemoryBarrier(
-					vk::AccessFlags(), vk::AccessFlagBits::eTransferWrite,
-					vk::ImageLayout::eUndefined,
-					vk::ImageLayout::eTransferDstOptimal,
-					VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
-					SequenceParam->Cache.InputImage.get(),
-					vk::ImageSubresourceRange(
-						vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
-					)
-				),
-			}
-		);
+		if( !InputImageStateIsSame )
+		{
+			// Layout transitions, prepare to copy
+			// Transfer buffers into images
+			Cmd.pipelineBarrier(
+				vk::PipelineStageFlagBits::eHost,
+				vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {},
+				{// Get staging buffer ready for a read
+				 vk::BufferMemoryBarrier(
+					 vk::AccessFlags(), vk::AccessFlagBits::eTransferRead,
+					 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
+					 SequenceParam->Cache.StagingBuffer.get(), 0u, VK_WHOLE_SIZE
+				 )},
+				{
+					// Get Input Image ready to be written to
+					vk::ImageMemoryBarrier(
+						vk::AccessFlags(), vk::AccessFlagBits::eTransferWrite,
+						vk::ImageLayout::eUndefined,
+						vk::ImageLayout::eTransferDstOptimal,
+						VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
+						SequenceParam->Cache.InputImage.get(),
+						vk::ImageSubresourceRange(
+							vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
+						)
+					),
+				}
+			);
 
-		// Upload input image data from staging buffer into Input Image
-		Cmd.copyBufferToImage(
-			SequenceParam->Cache.StagingBuffer.get(),
-			SequenceParam->Cache.InputImage.get(),
-			vk::ImageLayout::eTransferDstOptimal, {InputBufferMapping}
-		);
+			// Upload input image data from staging buffer into Input Image
+			Cmd.copyBufferToImage(
+				SequenceParam->Cache.StagingBuffer.get(),
+				SequenceParam->Cache.InputImage.get(),
+				vk::ImageLayout::eTransferDstOptimal, {InputBufferMapping}
+			);
 
-		// Layout transitions, copy is complete, ready input image to be sampled
-		// from
+			// Layout transitions, copy is complete, ready input image to be
+			// sampled from
+			Cmd.pipelineBarrier(
+				vk::PipelineStageFlagBits::eTransfer,
+				vk::PipelineStageFlagBits::eComputeShader,
+				vk::DependencyFlags(), {}, {},
+				{// Input Image is going to be read
+				 vk::ImageMemoryBarrier(
+					 vk::AccessFlagBits::eTransferWrite,
+					 vk::AccessFlagBits::eShaderRead,
+					 vk::ImageLayout::eTransferDstOptimal,
+					 vk::ImageLayout::eShaderReadOnlyOptimal,
+					 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
+					 SequenceParam->Cache.InputImage.get(),
+					 vk::ImageSubresourceRange(
+						 vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
+					 )
+				 )}
+			);
+		}
+
+		// Layout transitions, copy is complete, ready input image to be
+		// sampled from
 		Cmd.pipelineBarrier(
 			vk::PipelineStageFlagBits::eTransfer,
 			vk::PipelineStageFlagBits::eComputeShader, vk::DependencyFlags(),
 			{}, {},
-			{// Input Image is going to be read
-			 vk::ImageMemoryBarrier(
-				 vk::AccessFlagBits::eTransferWrite,
-				 vk::AccessFlagBits::eShaderRead,
-				 vk::ImageLayout::eTransferDstOptimal,
-				 vk::ImageLayout::eShaderReadOnlyOptimal,
-				 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
-				 SequenceParam->Cache.InputImage.get(),
-				 vk::ImageSubresourceRange(
-					 vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1
-				 )
-			 ),
-			 // Output Image is going to be written to as a color attachment
+			{// Output Image is going to be written to as a color attachment
 			 // within a render pass
 			 vk::ImageMemoryBarrier(
 				 vk::AccessFlags(), vk::AccessFlagBits::eShaderWrite,
@@ -1658,10 +1693,10 @@ PF_Err SmartRender(
 		BeginInfo.framebuffer = OutputFramebuffer.get();
 
 		// Rectangular region of the output buffer to render into
-		// TODO: we could potentially have a cached layer-sized output image,
-		// and only render into a subset of this image using extent_hint if we
-		// wanted to. But we use the exact output size for more immediate memory
-		// savings
+		// TODO: we could potentially have a cached layer-sized output
+		// image, and only render into a subset of this image using
+		// extent_hint if we wanted to. But we use the exact output size for
+		// more immediate memory savings
 		BeginInfo.renderArea.offset.x = BeginInfo.renderArea.offset.y = 0;
 		BeginInfo.renderArea.extent.width  = std::uint32_t(OutputLayer->width);
 		BeginInfo.renderArea.extent.height = std::uint32_t(OutputLayer->height);