From aef85eb58f09252a4873a7c83c61a78a2f5876a3 Mon Sep 17 00:00:00 2001
From: profan <robinhubner@gmail.com>
Date: Sun, 6 Jun 2021 18:37:31 +0100
Subject: [PATCH] use queue instead of stagingbelt for buffer writes

---
 examples/clipping.rs  | 20 +++--------------
 examples/depth.rs     | 18 ++-------------
 examples/hello.rs     | 20 +++--------------
 src/lib.rs            | 38 +++++++++++++++----------------
 src/pipeline.rs       | 37 +++++++++---------------------
 src/pipeline/cache.rs | 52 +++++++++++++------------------------------
 6 files changed, 53 insertions(+), 132 deletions(-)
diff --git a/examples/clipping.rs b/examples/clipping.rs
index 18c25fe..1e035e0 100644
--- a/examples/clipping.rs
+++ b/examples/clipping.rs
@@ -16,7 +16,7 @@ fn main() -> Result<(), Box<dyn Error>> {
     let surface = unsafe { instance.create_surface(&window) };
 
     // Initialize GPU
-    let (device, queue) = futures::executor::block_on(async {
+    let (device, mut queue) = futures::executor::block_on(async {
         let adapter = instance
             .request_adapter(&wgpu::RequestAdapterOptions {
                 power_preference: wgpu::PowerPreference::HighPerformance,
@@ -31,11 +31,6 @@ fn main() -> Result<(), Box<dyn Error>> {
             .expect("Request device")
     });
 
-    // Create staging belt and a local pool
-    let mut staging_belt = wgpu::util::StagingBelt::new(1024);
-    let mut local_pool = futures::executor::LocalPool::new();
-    let local_spawner = local_pool.spawner();
-
     // Prepare swap chain
     let render_format = wgpu::TextureFormat::Bgra8UnormSrgb;
     let mut size = window.inner_size();
@@ -139,7 +134,7 @@ fn main() -> Result<(), Box<dyn Error>> {
                 glyph_brush
                     .draw_queued(
                         &device,
-                        &mut staging_belt,
+                        &mut queue,
                         &mut encoder,
                         &frame.view,
                         size.width,
@@ -160,7 +155,7 @@ fn main() -> Result<(), Box<dyn Error>> {
                 glyph_brush
                     .draw_queued_with_transform_and_scissoring(
                         &device,
-                        &mut staging_belt,
+                        &mut queue,
                         &mut encoder,
                         &frame.view,
                         wgpu_glyph::orthographic_projection(
@@ -177,17 +172,8 @@ fn main() -> Result<(), Box<dyn Error>> {
                     .expect("Draw queued");
 
                 // Submit the work!
-                staging_belt.finish();
                 queue.submit(Some(encoder.finish()));
 
-                // Recall unused staging buffers
-                use futures::task::SpawnExt;
-
-                local_spawner
-                    .spawn(staging_belt.recall())
-                    .expect("Recall staging belt");
-
-                local_pool.run_until_stalled();
             }
             _ => {
                 *control_flow = winit::event_loop::ControlFlow::Wait;
diff --git a/examples/depth.rs b/examples/depth.rs
index 4d06214..d165d9d 100644
--- a/examples/depth.rs
+++ b/examples/depth.rs
@@ -18,7 +18,7 @@ fn main() -> Result<(), Box<dyn Error>> {
     let surface = unsafe { instance.create_surface(&window) };
 
     // Initialize GPU
-    let (device, queue) = futures::executor::block_on(async {
+    let (device, mut queue) = futures::executor::block_on(async {
         let adapter = instance
             .request_adapter(&wgpu::RequestAdapterOptions {
                 power_preference: wgpu::PowerPreference::HighPerformance,
@@ -33,11 +33,6 @@ fn main() -> Result<(), Box<dyn Error>> {
             .expect("Request device")
     });
 
-    // Create staging belt and a local pool
-    let mut staging_belt = wgpu::util::StagingBelt::new(1024);
-    let mut local_pool = futures::executor::LocalPool::new();
-    let local_spawner = local_pool.spawner();
-
     // Prepare swap chain and depth buffer
     let mut size = window.inner_size();
     let mut new_size = None;
@@ -158,7 +153,7 @@ fn main() -> Result<(), Box<dyn Error>> {
                 glyph_brush
                     .draw_queued(
                         &device,
-                        &mut staging_belt,
+                        &mut queue,
                         &mut encoder,
                         &frame.view,
                         wgpu::RenderPassDepthStencilAttachment {
@@ -178,17 +173,8 @@ fn main() -> Result<(), Box<dyn Error>> {
                     .expect("Draw queued");
 
                 // Submit the work!
-                staging_belt.finish();
                 queue.submit(Some(encoder.finish()));
 
-                // Recall unused staging buffers
-                use futures::task::SpawnExt;
-
-                local_spawner
-                    .spawn(staging_belt.recall())
-                    .expect("Recall staging belt");
-
-                local_pool.run_until_stalled();
             }
             _ => {
                 *control_flow = winit::event_loop::ControlFlow::Wait;
diff --git a/examples/hello.rs b/examples/hello.rs
index 8a9db43..b5877e9 100644
--- a/examples/hello.rs
+++ b/examples/hello.rs
@@ -16,7 +16,7 @@ fn main() -> Result<(), Box<dyn Error>> {
     let surface = unsafe { instance.create_surface(&window) };
 
     // Initialize GPU
-    let (device, queue) = futures::executor::block_on(async {
+    let (device, mut queue) = futures::executor::block_on(async {
         let adapter = instance
             .request_adapter(&wgpu::RequestAdapterOptions {
                 power_preference: wgpu::PowerPreference::HighPerformance,
@@ -31,11 +31,6 @@ fn main() -> Result<(), Box<dyn Error>> {
             .expect("Request device")
     });
 
-    // Create staging belt and a local pool
-    let mut staging_belt = wgpu::util::StagingBelt::new(1024);
-    let mut local_pool = futures::executor::LocalPool::new();
-    let local_spawner = local_pool.spawner();
-
     // Prepare swap chain
     let render_format = wgpu::TextureFormat::Bgra8UnormSrgb;
     let mut size = window.inner_size();
@@ -148,7 +143,7 @@ fn main() -> Result<(), Box<dyn Error>> {
                 glyph_brush
                     .draw_queued(
                         &device,
-                        &mut staging_belt,
+                        &mut queue,
                         &mut encoder,
                         &frame.view,
                         size.width,
@@ -157,17 +152,8 @@ fn main() -> Result<(), Box<dyn Error>> {
                     .expect("Draw queued");
 
                 // Submit the work!
-                staging_belt.finish();
                 queue.submit(Some(encoder.finish()));
-
-                // Recall unused staging buffers
-                use futures::task::SpawnExt;
-
-                local_spawner
-                    .spawn(staging_belt.recall())
-                    .expect("Recall staging belt");
-
-                local_pool.run_until_stalled();
+                
             }
             _ => {
                 *control_flow = winit::event_loop::ControlFlow::Wait;
diff --git a/src/lib.rs b/src/lib.rs
index 9588b7d..10300c1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -138,7 +138,7 @@ where
     fn process_queued(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
     ) {
         let pipeline = &mut self.pipeline;
@@ -153,7 +153,7 @@ where
 
                     pipeline.update_cache(
                         device,
-                        staging_belt,
+                        queue,
                         encoder,
                         offset,
                         size,
@@ -201,7 +201,7 @@ where
 
         match brush_action.unwrap() {
             BrushAction::Draw(verts) => {
-                self.pipeline.upload(device, staging_belt, encoder, &verts);
+                self.pipeline.upload(device, queue, encoder, &verts);
             }
             BrushAction::ReDraw => {}
         };
@@ -243,7 +243,7 @@ impl<F: Font + Sync, H: BuildHasher> GlyphBrush<(), F, H> {
     pub fn draw_queued(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         target: &wgpu::TextureView,
         target_width: u32,
@@ -251,7 +251,7 @@ impl<F: Font + Sync, H: BuildHasher> GlyphBrush<(), F, H> {
     ) -> Result<(), String> {
         self.draw_queued_with_transform(
             device,
-            staging_belt,
+            queue,
             encoder,
             target,
             orthographic_projection(target_width, target_height),
@@ -273,15 +273,15 @@ impl<F: Font + Sync, H: BuildHasher> GlyphBrush<(), F, H> {
     pub fn draw_queued_with_transform(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         target: &wgpu::TextureView,
         transform: [f32; 16],
     ) -> Result<(), String> {
-        self.process_queued(device, staging_belt, encoder);
+        self.process_queued(device, queue, encoder);
         self.pipeline.draw(
             device,
-            staging_belt,
+            queue,
             encoder,
             target,
             transform,
@@ -306,16 +306,16 @@ impl<F: Font + Sync, H: BuildHasher> GlyphBrush<(), F, H> {
     pub fn draw_queued_with_transform_and_scissoring(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         target: &wgpu::TextureView,
         transform: [f32; 16],
         region: Region,
     ) -> Result<(), String> {
-        self.process_queued(device, staging_belt, encoder);
+        self.process_queued(device, queue, encoder);
         self.pipeline.draw(
             device,
-            staging_belt,
+            queue,
             encoder,
             target,
             transform,
@@ -363,7 +363,7 @@ impl<F: Font + Sync, H: BuildHasher> GlyphBrush<wgpu::DepthStencilState, F, H> {
     pub fn draw_queued(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         target: &wgpu::TextureView,
         depth_stencil_attachment: wgpu::RenderPassDepthStencilAttachment,
@@ -372,7 +372,7 @@ impl<F: Font + Sync, H: BuildHasher> GlyphBrush<wgpu::DepthStencilState, F, H> {
     ) -> Result<(), String> {
         self.draw_queued_with_transform(
             device,
-            staging_belt,
+            queue,
             encoder,
             target,
             depth_stencil_attachment,
@@ -395,16 +395,16 @@ impl<F: Font + Sync, H: BuildHasher> GlyphBrush<wgpu::DepthStencilState, F, H> {
     pub fn draw_queued_with_transform(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         target: &wgpu::TextureView,
         depth_stencil_attachment: wgpu::RenderPassDepthStencilAttachment,
         transform: [f32; 16],
     ) -> Result<(), String> {
-        self.process_queued(device, staging_belt, encoder);
+        self.process_queued(device, queue, encoder);
         self.pipeline.draw(
             device,
-            staging_belt,
+            queue,
             encoder,
             target,
             depth_stencil_attachment,
@@ -430,18 +430,18 @@ impl<F: Font + Sync, H: BuildHasher> GlyphBrush<wgpu::DepthStencilState, F, H> {
     pub fn draw_queued_with_transform_and_scissoring(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         target: &wgpu::TextureView,
         depth_stencil_attachment: wgpu::RenderPassDepthStencilAttachment,
         transform: [f32; 16],
         region: Region,
     ) -> Result<(), String> {
-        self.process_queued(device, staging_belt, encoder);
+        self.process_queued(device, queue, encoder);
 
         self.pipeline.draw(
             device,
-            staging_belt,
+            queue,
             encoder,
             target,
             depth_stencil_attachment,
diff --git a/src/pipeline.rs b/src/pipeline.rs
index 79cc321..061dec9 100644
--- a/src/pipeline.rs
+++ b/src/pipeline.rs
@@ -44,7 +44,7 @@ impl Pipeline<()> {
     pub fn draw(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         target: &wgpu::TextureView,
         transform: [f32; 16],
@@ -53,7 +53,7 @@ impl Pipeline<()> {
         draw(
             self,
             device,
-            staging_belt,
+            queue,
             encoder,
             target,
             None,
@@ -85,7 +85,7 @@ impl Pipeline<wgpu::DepthStencilState> {
     pub fn draw(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         target: &wgpu::TextureView,
         depth_stencil_attachment: wgpu::RenderPassDepthStencilAttachment,
@@ -95,7 +95,7 @@ impl Pipeline<wgpu::DepthStencilState> {
         draw(
             self,
             device,
-            staging_belt,
+            queue,
             encoder,
             target,
             Some(depth_stencil_attachment),
@@ -109,14 +109,14 @@ impl<Depth> Pipeline<Depth> {
     pub fn update_cache(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         offset: [u16; 2],
         size: [u16; 2],
         data: &[u8],
     ) {
         self.cache
-            .update(device, staging_belt, encoder, offset, size, data);
+            .update(device, queue, encoder, offset, size, data);
     }
 
     pub fn increase_cache_size(
@@ -139,7 +139,7 @@ impl<Depth> Pipeline<Depth> {
     pub fn upload(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         instances: &[Instance],
     ) {
@@ -163,15 +163,7 @@ impl<Depth> Pipeline<Depth> {
         let instances_bytes = bytemuck::cast_slice(instances);
 
         if let Some(size) = NonZeroU64::new(instances_bytes.len() as u64) {
-            let mut instances_view = staging_belt.write_buffer(
-                encoder,
-                &self.instances,
-                0,
-                size,
-                device,
-            );
-
-            instances_view.copy_from_slice(instances_bytes);
+            queue.write_buffer(&self.instances, 0, instances_bytes);
         }
 
         self.current_instances = instances.len();
@@ -372,7 +364,7 @@ fn build<D>(
 fn draw<D>(
     pipeline: &mut Pipeline<D>,
     device: &wgpu::Device,
-    staging_belt: &mut wgpu::util::StagingBelt,
+    queue: &mut wgpu::Queue,
     encoder: &mut wgpu::CommandEncoder,
     target: &wgpu::TextureView,
     depth_stencil_attachment: Option<wgpu::RenderPassDepthStencilAttachment>,
@@ -380,16 +372,7 @@ fn draw<D>(
     region: Option<Region>,
 ) {
     if transform != pipeline.current_transform {
-        let mut transform_view = staging_belt.write_buffer(
-            encoder,
-            &pipeline.transform,
-            0,
-            unsafe { NonZeroU64::new_unchecked(16 * 4) },
-            device,
-        );
-
-        transform_view.copy_from_slice(bytemuck::cast_slice(&transform));
-
+        queue.write_buffer(&pipeline.transform, 0, bytemuck::cast_slice(&transform));
         pipeline.current_transform = transform;
     }
 
diff --git a/src/pipeline/cache.rs b/src/pipeline/cache.rs
index 75813dc..fbaebba 100644
--- a/src/pipeline/cache.rs
+++ b/src/pipeline/cache.rs
@@ -1,11 +1,9 @@
-use core::num::NonZeroU64;
 use std::num::NonZeroU32;
 
 pub struct Cache {
     texture: wgpu::Texture,
     pub(super) view: wgpu::TextureView,
-    upload_buffer: wgpu::Buffer,
-    upload_buffer_size: u64,
+    padded_cache: Vec<u8>
 }
 
 impl Cache {
@@ -39,15 +37,14 @@ impl Cache {
         Cache {
             texture,
             view,
-            upload_buffer,
-            upload_buffer_size: Self::INITIAL_UPLOAD_BUFFER_SIZE,
+            padded_cache: Vec::new()
         }
     }
 
     pub fn update(
         &mut self,
         device: &wgpu::Device,
-        staging_belt: &mut wgpu::util::StagingBelt,
+        queue: &mut wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         offset: [u16; 2],
         size: [u16; 2],
@@ -65,43 +62,20 @@ impl Cache {
         let padded_width = width + padded_width_padding;
 
         let padded_data_size = (padded_width * height) as u64;
+        self.padded_cache.reserve(padded_data_size as usize);
 
-        if self.upload_buffer_size < padded_data_size {
-            self.upload_buffer =
-                device.create_buffer(&wgpu::BufferDescriptor {
-                    label: Some("wgpu_glyph::Cache upload buffer"),
-                    size: padded_data_size,
-                    usage: wgpu::BufferUsage::COPY_DST
-                        | wgpu::BufferUsage::COPY_SRC,
-                    mapped_at_creation: false,
-                });
-
-            self.upload_buffer_size = padded_data_size;
+        unsafe {
+            self.padded_cache.set_len(padded_data_size as usize);
         }
 
-        let mut padded_data = staging_belt.write_buffer(
-            encoder,
-            &self.upload_buffer,
-            0,
-            NonZeroU64::new(padded_data_size).unwrap(),
-            device,
-        );
+        let padded_data = self.padded_cache.as_mut_slice();
 
         for row in 0..height {
             padded_data[row * padded_width..row * padded_width + width]
-                .copy_from_slice(&data[row * width..(row + 1) * width])
+                .copy_from_slice(&data[row * width..(row + 1) * width]);
         }
 
-        // TODO: Move to use Queue for less buffer usage
-        encoder.copy_buffer_to_texture(
-            wgpu::ImageCopyBuffer {
-                buffer: &self.upload_buffer,
-                layout: wgpu::ImageDataLayout {
-                    offset: 0,
-                    bytes_per_row: NonZeroU32::new(padded_width as u32),
-                    rows_per_image: NonZeroU32::new(height as u32),
-                },
-            },
+        queue.write_texture(
             wgpu::ImageCopyTexture {
                 texture: &self.texture,
                 mip_level: 0,
@@ -111,11 +85,17 @@ impl Cache {
                     z: 0,
                 },
             },
+            &padded_data,
+            wgpu::ImageDataLayout {
+                offset: 0,
+                bytes_per_row: NonZeroU32::new(padded_width as u32),
+                rows_per_image: NonZeroU32::new(height as u32),
+            },
             wgpu::Extent3d {
                 width: size[0] as u32,
                 height: size[1] as u32,
                 depth_or_array_layers: 1,
-            },
+            }
         );
     }
 }