make the StagingBuffer implementation more robust

teoxoy · teoxoy · commit 6f16ea460ab4 · 2024-07-12T16:33:57.000+02:00
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
@@ -2519,20 +2519,17 @@ impl Global {
         }
         let map_state = &*buffer.map_state.lock();
         match *map_state {
-            resource::BufferMapState::Init { ref ptr, .. } => {
+            resource::BufferMapState::Init { ref staging_buffer } => {
                 // offset (u64) can not be < 0, so no need to validate the lower bound
                 if offset + range_size > buffer.size {
                     return Err(BufferAccessError::OutOfBoundsOverrun {
                         index: offset + range_size - 1,
                         max: buffer.size,
                     });
                 }
-                unsafe {
-                    Ok((
-                        NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)),
-                        range_size,
-                    ))
-                }
+                let ptr = unsafe { staging_buffer.ptr() };
+                let ptr = unsafe { NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)) };
+                Ok((ptr, range_size))
             }
             resource::BufferMapState::Active {
                 ref ptr, ref range, ..
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
@@ -31,7 +31,7 @@ use smallvec::SmallVec;
 use std::{
     iter,
     mem::{self},
-    ptr::{self, NonNull},
+    ptr::NonNull,
     sync::{atomic::Ordering, Arc},
 };
 use thiserror::Error;
@@ -405,17 +405,13 @@ impl Global {
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, data_size)?;
+        let mut staging_buffer = StagingBuffer::new(device, data_size)?;
         let mut pending_writes = device.pending_writes.lock();
         let pending_writes = pending_writes.as_mut().unwrap();
 
-        let staging_buffer = unsafe {
+        let staging_buffer = {
             profiling::scope!("copy");
-            ptr::copy_nonoverlapping(
-                data.as_ptr(),
-                staging_buffer_ptr.as_ptr(),
-                data_size.get() as usize,
-            );
+            staging_buffer.write(data);
             staging_buffer.flush()
         };
 
@@ -448,13 +444,14 @@ impl Global {
 
         let device = &queue.device;
 
-        let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, buffer_size)?;
+        let staging_buffer = StagingBuffer::new(device, buffer_size)?;
+        let ptr = unsafe { staging_buffer.ptr() };
 
         let fid = hub.staging_buffers.prepare(id_in);
         let id = fid.assign(Arc::new(staging_buffer));
         resource_log!("Queue::create_staging_buffer {id:?}");
 
-        Ok((id, staging_buffer_ptr))
+        Ok((id, ptr))
     }
 
     pub fn queue_write_staging_buffer<A: HalApi>(
@@ -487,7 +484,7 @@ impl Global {
         // user. Platform validation requires that the staging buffer always
         // be freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let staging_buffer = unsafe { staging_buffer.flush() };
+        let staging_buffer = staging_buffer.flush();
 
         let result = self.queue_write_staging_buffer_impl(
             &queue,
@@ -779,42 +776,34 @@ impl Global {
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, stage_size)?;
+        let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
 
         if stage_bytes_per_row == bytes_per_row {
             profiling::scope!("copy aligned");
             // Fast path if the data is already being aligned optimally.
-            unsafe {
-                ptr::copy_nonoverlapping(
-                    data.as_ptr().offset(data_layout.offset as isize),
-                    staging_buffer_ptr.as_ptr(),
-                    stage_size.get() as usize,
-                );
-            }
+            staging_buffer.write(&data[data_layout.offset as usize..]);
         } else {
             profiling::scope!("copy chunked");
             // Copy row by row into the optimal alignment.
             let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
             for layer in 0..size.depth_or_array_layers {
                 let rows_offset = layer * block_rows_per_image;
-                for row in 0..height_blocks {
+                for row in rows_offset..rows_offset + height_blocks {
+                    let src_offset = data_layout.offset as u32 + row * bytes_per_row;
+                    let dst_offset = row * stage_bytes_per_row;
                     unsafe {
-                        ptr::copy_nonoverlapping(
-                            data.as_ptr().offset(
-                                data_layout.offset as isize
-                                    + (rows_offset + row) as isize * bytes_per_row as isize,
-                            ),
-                            staging_buffer_ptr.as_ptr().offset(
-                                (rows_offset + row) as isize * stage_bytes_per_row as isize,
-                            ),
+                        staging_buffer.write_with_offset(
+                            data,
+                            src_offset as isize,
+                            dst_offset as isize,
                             copy_bytes_per_row,
-                        );
+                        )
                     }
                 }
             }
         }
 
-        let staging_buffer = unsafe { staging_buffer.flush() };
+        let staging_buffer = staging_buffer.flush();
 
         let regions = (0..array_layer_count).map(|rel_array_layer| {
             let mut texture_base = dst_base.clone();
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
@@ -591,18 +591,15 @@ impl<A: HalApi> Device<A> {
             };
             hal::BufferUses::MAP_WRITE
         } else {
-            let (staging_buffer, staging_buffer_ptr) =
+            let mut staging_buffer =
                 StagingBuffer::new(self, wgt::BufferSize::new(aligned_size).unwrap())?;
 
             // Zero initialize memory and then mark the buffer as initialized
             // (it's guaranteed that this is the case by the time the buffer is usable)
-            unsafe { std::ptr::write_bytes(staging_buffer_ptr.as_ptr(), 0, aligned_size as usize) };
+            staging_buffer.write_zeros();
             buffer.initialization_status.write().drain(0..aligned_size);
 
-            *buffer.map_state.lock() = resource::BufferMapState::Init {
-                staging_buffer,
-                ptr: staging_buffer_ptr,
-            };
+            *buffer.map_state.lock() = resource::BufferMapState::Init { staging_buffer };
             hal::BufferUses::COPY_DST
         };
 
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
@@ -256,10 +256,7 @@ pub enum BufferMapAsyncStatus {
 #[derive(Debug)]
 pub(crate) enum BufferMapState<A: HalApi> {
     /// Mapped at creation.
-    Init {
-        staging_buffer: StagingBuffer<A>,
-        ptr: NonNull<u8>,
-    },
+    Init { staging_buffer: StagingBuffer<A> },
     /// Waiting for GPU to be done before mapping
     Waiting(BufferPendingMapping<A>),
     /// Mapped
@@ -651,28 +648,22 @@ impl<A: HalApi> Buffer<A> {
         let raw_buf = self.try_raw(&snatch_guard)?;
         log::debug!("{} map state -> Idle", self.error_ident());
         match mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle) {
-            BufferMapState::Init {
-                staging_buffer,
-                ptr,
-            } => {
+            BufferMapState::Init { staging_buffer } => {
                 #[cfg(feature = "trace")]
                 if let Some(ref mut trace) = *device.trace.lock() {
-                    let data = trace.make_binary("bin", unsafe {
-                        std::slice::from_raw_parts(ptr.as_ptr(), self.size as usize)
-                    });
+                    let data = trace.make_binary("bin", staging_buffer.get_data());
                     trace.add(trace::Action::WriteBuffer {
                         id: buffer_id,
                         data,
                         range: 0..self.size,
                         queued: true,
                     });
                 }
-                let _ = ptr;
 
                 let mut pending_writes = device.pending_writes.lock();
                 let pending_writes = pending_writes.as_mut().unwrap();
 
-                let staging_buffer = unsafe { staging_buffer.flush() };
+                let staging_buffer = staging_buffer.flush();
 
                 self.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
                 let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
@@ -832,6 +823,11 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
     }
 }
 
+#[cfg(send_sync)]
+unsafe impl<A: HalApi> Send for StagingBuffer<A> {}
+#[cfg(send_sync)]
+unsafe impl<A: HalApi> Sync for StagingBuffer<A> {}
+
 /// A temporary buffer, consumed by the command that uses it.
 ///
 /// A [`StagingBuffer`] is designed for one-shot uploads of data to the GPU. It
@@ -857,13 +853,11 @@ pub struct StagingBuffer<A: HalApi> {
     device: Arc<Device<A>>,
     pub(crate) size: wgt::BufferSize,
     is_coherent: bool,
+    ptr: NonNull<u8>,
 }
 
 impl<A: HalApi> StagingBuffer<A> {
-    pub(crate) fn new(
-        device: &Arc<Device<A>>,
-        size: wgt::BufferSize,
-    ) -> Result<(Self, NonNull<u8>), DeviceError> {
+    pub(crate) fn new(device: &Arc<Device<A>>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
         use hal::Device;
         profiling::scope!("StagingBuffer::new");
         let stage_desc = hal::BufferDescriptor {
@@ -881,9 +875,55 @@ impl<A: HalApi> StagingBuffer<A> {
             device: device.clone(),
             size,
             is_coherent: mapping.is_coherent,
+            ptr: mapping.ptr,
         };
 
-        Ok((staging_buffer, mapping.ptr))
+        Ok(staging_buffer)
+    }
+
+    /// SAFETY: You must not call any functions of `self`
+    /// until you stopped using the returned pointer.
+    pub(crate) unsafe fn ptr(&self) -> NonNull<u8> {
+        self.ptr
+    }
+
+    #[cfg(feature = "trace")]
+    pub(crate) fn get_data(&self) -> &[u8] {
+        unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.size.get() as usize) }
+    }
+
+    pub(crate) fn write_zeros(&mut self) {
+        unsafe { core::ptr::write_bytes(self.ptr.as_ptr(), 0, self.size.get() as usize) };
+    }
+
+    pub(crate) fn write(&mut self, data: &[u8]) {
+        assert!(data.len() >= self.size.get() as usize);
+        // SAFETY: With the assert above, all of `copy_nonoverlapping`'s
+        // requirements are satisfied.
+        unsafe {
+            core::ptr::copy_nonoverlapping(
+                data.as_ptr(),
+                self.ptr.as_ptr(),
+                self.size.get() as usize,
+            );
+        }
+    }
+
+    /// SAFETY: The offsets and size must be in-bounds.
+    pub(crate) unsafe fn write_with_offset(
+        &mut self,
+        data: &[u8],
+        src_offset: isize,
+        dst_offset: isize,
+        size: usize,
+    ) {
+        unsafe {
+            core::ptr::copy_nonoverlapping(
+                data.as_ptr().offset(src_offset),
+                self.ptr.as_ptr().offset(dst_offset),
+                size,
+            );
+        }
     }
 
     pub(crate) fn flush(self) -> FlushedStagingBuffer<A> {
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
@@ -714,9 +714,13 @@ pub trait Device: WasmNotSendSync {
     ///   be ordered, so it is meaningful to talk about what must occur
     ///   "between" them.
     ///
+    /// - Zero-sized mappings are not allowed.
+    ///
+    /// - The returned [`BufferMapping::ptr`] must not be used after a call to
+    /// [`Device::unmap_buffer`].
+    ///
     /// [`MAP_READ`]: BufferUses::MAP_READ
     /// [`MAP_WRITE`]: BufferUses::MAP_WRITE
-    //TODO: clarify if zero-sized mapping is allowed
     unsafe fn map_buffer(
         &self,
         buffer: &<Self::A as Api>::Buffer,