From 0a5c7fd15e94ab8f5e8ef7a9ea3c72c005422a7f Mon Sep 17 00:00:00 2001 From: Martin Valigursky Date: Tue, 14 Oct 2025 14:01:03 +0100 Subject: [PATCH 1/6] Add UploadStream API and optimize GSplat order data uploads with non-blocking transfers --- .../graphics/null/null-graphics-device.js | 4 + .../shader-chunks/frag/shared-wgsl.js | 2 + src/platform/graphics/storage-buffer.js | 24 ++- src/platform/graphics/upload-stream.js | 64 ++++++ .../graphics/webgl/webgl-graphics-device.js | 5 + .../graphics/webgl/webgl-upload-stream.js | 182 ++++++++++++++++++ .../graphics/webgpu/webgpu-graphics-device.js | 5 + .../graphics/webgpu/webgpu-upload-stream.js | 162 ++++++++++++++++ src/scene/gsplat-unified/gsplat-manager.js | 5 +- src/scene/gsplat-unified/gsplat-renderer.js | 24 ++- .../gsplat-unified/gsplat-work-buffer.js | 53 +++-- src/scene/gsplat/gsplat-instance.js | 16 +- .../glsl/chunks/gsplat/vert/gsplatSource.js | 7 +- .../wgsl/chunks/gsplat/vert/gsplatSource.js | 22 ++- 14 files changed, 544 insertions(+), 31 deletions(-) create mode 100644 src/platform/graphics/upload-stream.js create mode 100644 src/platform/graphics/webgl/webgl-upload-stream.js create mode 100644 src/platform/graphics/webgpu/webgpu-upload-stream.js diff --git a/src/platform/graphics/null/null-graphics-device.js b/src/platform/graphics/null/null-graphics-device.js index c6cc0a5e13b..dd935dde275 100644 --- a/src/platform/graphics/null/null-graphics-device.js +++ b/src/platform/graphics/null/null-graphics-device.js @@ -101,6 +101,10 @@ class NullGraphicsDevice extends GraphicsDevice { return new NullDrawCommands(); } + createUploadStreamImpl(uploadStream) { + return null; + } + draw(primitive, indexBuffer, numInstances, drawCommands, first = true, last = true) { } diff --git a/src/platform/graphics/shader-chunks/frag/shared-wgsl.js b/src/platform/graphics/shader-chunks/frag/shared-wgsl.js index 8670a0ee8e2..42052fcf993 100644 --- a/src/platform/graphics/shader-chunks/frag/shared-wgsl.js +++ b/src/platform/graphics/shader-chunks/frag/shared-wgsl.js @@ -1,5 +1,7 @@ export default /* glsl */` +#define WEBGPU + // convert clip space position into texture coordinates for sampling scene grab textures fn getGrabScreenPos(clipPos: vec4) -> vec2 { var uv: vec2 = (clipPos.xy / clipPos.w) * 0.5 + vec2(0.5); diff --git a/src/platform/graphics/storage-buffer.js b/src/platform/graphics/storage-buffer.js index e070425481b..907dfb94300 100644 --- a/src/platform/graphics/storage-buffer.js +++ b/src/platform/graphics/storage-buffer.js @@ -27,13 +27,16 @@ class StorageBuffer { * of {@link BUFFERUSAGE_READ}, {@link BUFFERUSAGE_WRITE}, {@link BUFFERUSAGE_COPY_SRC} and * {@link BUFFERUSAGE_COPY_DST} flags. This parameter can be omitted if no special usage is * required. + * @param {boolean} [addStorageUsage] - If true, automatically adds BUFFERUSAGE_STORAGE flag. + * Set to false for staging buffers that use BUFFERUSAGE_WRITE. Defaults to true. */ - constructor(graphicsDevice, byteSize, bufferUsage = 0) { + constructor(graphicsDevice, byteSize, bufferUsage = 0, addStorageUsage = true) { this.device = graphicsDevice; this.byteSize = byteSize; this.bufferUsage = bufferUsage; - this.impl = graphicsDevice.createBufferImpl(BUFFERUSAGE_STORAGE | bufferUsage); + const usage = addStorageUsage ? (BUFFERUSAGE_STORAGE | bufferUsage) : bufferUsage; + this.impl = graphicsDevice.createBufferImpl(usage); this.impl.allocate(graphicsDevice, byteSize); this.device.buffers.push(this); @@ -106,6 +109,23 @@ class StorageBuffer { clear(offset = 0, size = this.byteSize) { this.impl.clear(this.device, offset, size); } + + /** + * Copy data from another storage buffer into this storage buffer. + * + * @param {StorageBuffer} srcBuffer - The source storage buffer to copy from. + * @param {number} [srcOffset] - The byte offset in the source buffer. Defaults to 0. + * @param {number} [dstOffset] - The byte offset in this buffer. Defaults to 0. + * @param {number} [size] - The byte size of data to copy. Defaults to the full size of the + * source buffer minus the source offset. + */ + copy(srcBuffer, srcOffset = 0, dstOffset = 0, size = srcBuffer.byteSize - srcOffset) { + Debug.assert(srcOffset + size <= srcBuffer.byteSize, 'Source copy range exceeds buffer size'); + Debug.assert(dstOffset + size <= this.byteSize, 'Destination copy range exceeds buffer size'); + + const commandEncoder = this.device.getCommandEncoder(); + commandEncoder.copyBufferToBuffer(srcBuffer.impl.buffer, srcOffset, this.impl.buffer, dstOffset, size); + } } export { StorageBuffer }; diff --git a/src/platform/graphics/upload-stream.js b/src/platform/graphics/upload-stream.js new file mode 100644 index 00000000000..4762787444c --- /dev/null +++ b/src/platform/graphics/upload-stream.js @@ -0,0 +1,64 @@ +/** + * @import { GraphicsDevice } from './graphics-device.js' + * @import { StorageBuffer } from './storage-buffer.js' + * @import { Texture } from './texture.js' + */ + +/** + * Manages non-blocking uploads of data to GPU resources (textures or storage buffers). + * Internally pools staging resources (PBOs on WebGL, staging buffers on WebGPU) to avoid blocking + * when the GPU is busy with previous uploads. + * + * Important: Create one UploadStream per target resource. + * + * @category Graphics + * @ignore + */ +class UploadStream { + /** + * Create a new UploadStream instance. + * + * @param {GraphicsDevice} device - The graphics device. + * @param {boolean} [useSingleBuffer] - If true, uses simple direct uploads (single texture on + * WebGL, direct write on WebGPU). If false (default), uses optimized multi-buffer strategy (PBOs + * with orphaning on WebGL, staging buffers on WebGPU) for potentially non-blocking uploads. + */ + constructor(device, useSingleBuffer = false) { + this.device = device; + this.useSingleBuffer = useSingleBuffer; + + // Create platform-specific implementation + this.impl = device.createUploadStreamImpl(this); + } + + /** + * Upload data to a texture (WebGL path) or storage buffer (WebGPU path). + * For WebGL textures, both offset and size must be multiples of the texture width (aligned to + * full rows). + * For WebGPU storage buffers, both offset and size byte values must be multiples of 4. + * + * @param {Uint8Array|Uint32Array|Float32Array} data - The data to upload. Must contain at least + * `size` elements. + * @param {Texture|StorageBuffer} target - The target resource (texture for WebGL, storage + * buffer for WebGPU). + * @param {number} [offset] - The element offset in the target where upload starts. Defaults to 0. + * For WebGL textures, must be a multiple of texture width. For WebGPU, the byte offset must be + * a multiple of 4. + * @param {number} [size] - The number of elements to upload. Defaults to data.length. + * For WebGL textures, must be a multiple of texture width. For WebGPU, the byte size must be + * a multiple of 4. + */ + upload(data, target, offset = 0, size = data.length) { + this.impl?.upload(data, target, offset, size); + } + + /** + * Destroy the upload stream and clean up all pooled resources. + */ + destroy() { + this.impl?.destroy(); + this.impl = null; + } +} + +export { UploadStream }; diff --git a/src/platform/graphics/webgl/webgl-graphics-device.js b/src/platform/graphics/webgl/webgl-graphics-device.js index d85e199cc6d..d78aa9bbf0d 100644 --- a/src/platform/graphics/webgl/webgl-graphics-device.js +++ b/src/platform/graphics/webgl/webgl-graphics-device.js @@ -36,6 +36,7 @@ import { WebglShader } from './webgl-shader.js'; import { WebglDrawCommands } from './webgl-draw-commands.js'; import { WebglTexture } from './webgl-texture.js'; import { WebglRenderTarget } from './webgl-render-target.js'; +import { WebglUploadStream } from './webgl-upload-stream.js'; import { BlendState } from '../blend-state.js'; import { DepthState } from '../depth-state.js'; import { StencilParameters } from '../stencil-parameters.js'; @@ -685,6 +686,10 @@ class WebglGraphicsDevice extends GraphicsDevice { return new WebglRenderTarget(); } + createUploadStreamImpl(uploadStream) { + return new WebglUploadStream(uploadStream); + } + // #if _DEBUG pushMarker(name) { if (platform.browser && window.spector) { diff --git a/src/platform/graphics/webgl/webgl-upload-stream.js b/src/platform/graphics/webgl/webgl-upload-stream.js new file mode 100644 index 00000000000..eedb93fc18e --- /dev/null +++ b/src/platform/graphics/webgl/webgl-upload-stream.js @@ -0,0 +1,182 @@ +import { Debug } from '../../../core/debug.js'; + +/** + * @import { UploadStream } from '../upload-stream.js' + * @import { Texture } from '../texture.js' + */ + +/** + * WebGL implementation of UploadStream. + * Can use either simple direct texture uploads or optimized PBO strategy with orphaning. + * + * @ignore + */ +class WebglUploadStream { + /** + * Available PBOs ready for immediate use. + * + * @type {Array<{pbo: WebGLBuffer, size: number}>} + */ + availablePBOs = []; + + /** + * PBOs currently in use by the GPU. + * + * @type {Array<{pbo: WebGLBuffer, size: number, sync: WebGLSync}>} + */ + pendingPBOs = []; + + /** + * @param {UploadStream} uploadStream - The upload stream. + */ + constructor(uploadStream) { + this.uploadStream = uploadStream; + this.useSingleBuffer = uploadStream.useSingleBuffer; + } + + destroy() { + // @ts-ignore - gl is available on WebglGraphicsDevice + const gl = this.uploadStream.device.gl; + this.availablePBOs.forEach(info => gl.deleteBuffer(info.pbo)); + this.pendingPBOs.forEach((item) => { + if (item.sync) gl.deleteSync(item.sync); + gl.deleteBuffer(item.pbo); + }); + } + + /** + * Update PBOs: poll completed ones and remove undersized buffers. + * + * @param {number} minByteSize - Minimum size for buffers to keep. Smaller buffers are destroyed. + */ + update(minByteSize) { + // @ts-ignore - gl is available on WebglGraphicsDevice + const gl = this.uploadStream.device.gl; + + // Poll pending PBOs + const pending = this.pendingPBOs; + for (let i = pending.length - 1; i >= 0; i--) { + const item = pending[i]; + + const result = gl.clientWaitSync(item.sync, 0, 0); + if (result === gl.CONDITION_SATISFIED || result === gl.ALREADY_SIGNALED) { + gl.deleteSync(item.sync); + this.availablePBOs.push({ pbo: item.pbo, size: item.size }); + pending.splice(i, 1); + } + } + + // Remove any available PBOs that are too small + const available = this.availablePBOs; + for (let i = available.length - 1; i >= 0; i--) { + if (available[i].size < minByteSize) { + gl.deleteBuffer(available[i].pbo); + available.splice(i, 1); + } + } + } + + /** + * Upload data to a texture using PBOs (optimized) or direct upload (simple). + * + * @param {Uint8Array|Uint32Array|Float32Array} data - The data to upload. + * @param {Texture} target - The target texture. + * @param {number} offset - The element offset in the target. Must be a multiple of texture width. + * @param {number} size - The number of elements to upload. Must be a multiple of texture width. + */ + upload(data, target, offset, size) { + if (this.useSingleBuffer) { + this.uploadDirect(data, target, offset, size); + } else { + this.uploadPBO(data, target, offset, size); + } + } + + /** + * Direct texture upload (simple, blocking). + * + * @param {Uint8Array|Uint32Array|Float32Array} data - The data to upload. + * @param {Texture} target - The target texture. + * @param {number} offset - The element offset in the target. + * @param {number} size - The number of elements to upload. + * @private + */ + uploadDirect(data, target, offset, size) { + Debug.assert(offset === 0, 'Direct texture upload with non-zero offset is not supported. Use PBO mode instead.'); + Debug.assert(target._levels); + + target._levels[0] = data; + target.upload(); + } + + /** + * PBO-based upload with orphaning (optimized, potentially non-blocking). + * + * @param {Uint8Array|Uint32Array|Float32Array} data - The data to upload. + * @param {import('../texture.js').Texture} target - The target texture. + * @param {number} offset - The element offset in the target. + * @param {number} size - The number of elements to upload. + * @private + */ + uploadPBO(data, target, offset, size) { + const device = this.uploadStream.device; + // @ts-ignore - gl is available on WebglGraphicsDevice + const gl = device.gl; + + const width = target.width; + const byteSize = size * data.BYTES_PER_ELEMENT; + + // Update PBOs + this.update(byteSize); + + // WebGL requires offset and size aligned to full rows for texSubImage2D + Debug.assert(offset % width === 0, `Upload offset (${offset}) must be a multiple of texture width (${width}) for row alignment`); + Debug.assert(size % width === 0, `Upload size (${size}) must be a multiple of texture width (${width}) for row alignment`); + + const startY = offset / width; + const height = size / width; + + // Get or create a PBO (guaranteed to be large enough after update) + const pboInfo = this.availablePBOs.pop() ?? (() => { + const pbo = gl.createBuffer(); + return { pbo, size: byteSize }; + })(); + + // Orphan + bufferSubData pattern + gl.bindBuffer(gl.PIXEL_UNPACK_BUFFER, pboInfo.pbo); + gl.bufferData(gl.PIXEL_UNPACK_BUFFER, byteSize, gl.STREAM_DRAW); + gl.bufferSubData(gl.PIXEL_UNPACK_BUFFER, 0, new Uint8Array(data.buffer, data.byteOffset, byteSize)); + + // Unbind PBO before setTexture + gl.bindBuffer(gl.PIXEL_UNPACK_BUFFER, null); + + // Ensure texture is created and bound + // @ts-ignore - setTexture is available on WebglGraphicsDevice + device.setTexture(target, 0); + + // Rebind PBO for texSubImage2D + gl.bindBuffer(gl.PIXEL_UNPACK_BUFFER, pboInfo.pbo); + + // Set pixel-store parameters (use device methods for cached state) + device.setUnpackFlipY(false); + device.setUnpackPremultiplyAlpha(false); + gl.pixelStorei(gl.UNPACK_ALIGNMENT, 1); + gl.pixelStorei(gl.UNPACK_ROW_LENGTH, 0); + gl.pixelStorei(gl.UNPACK_SKIP_ROWS, 0); + gl.pixelStorei(gl.UNPACK_SKIP_PIXELS, 0); + + // Copy from PBO to texture (GPU-side) + const impl = target.impl; + gl.texSubImage2D(gl.TEXTURE_2D, 0, 0, startY, width, height, impl._glFormat, impl._glPixelType, 0); + + gl.bindBuffer(gl.PIXEL_UNPACK_BUFFER, null); + + // Track for recycling + const sync = gl.fenceSync(gl.SYNC_GPU_COMMANDS_COMPLETE, 0); + this.pendingPBOs.push({ pbo: pboInfo.pbo, size: byteSize, sync }); + + gl.flush(); + } +} + +export { WebglUploadStream }; diff --git a/src/platform/graphics/webgpu/webgpu-graphics-device.js b/src/platform/graphics/webgpu/webgpu-graphics-device.js index 81da1a3d5c6..2b2adc02a9a 100644 --- a/src/platform/graphics/webgpu/webgpu-graphics-device.js +++ b/src/platform/graphics/webgpu/webgpu-graphics-device.js @@ -32,6 +32,7 @@ import { WebgpuCompute } from './webgpu-compute.js'; import { WebgpuBuffer } from './webgpu-buffer.js'; import { StorageBuffer } from '../storage-buffer.js'; import { WebgpuDrawCommands } from './webgpu-draw-commands.js'; +import { WebgpuUploadStream } from './webgpu-upload-stream.js'; /** * @import { RenderPass } from '../render-pass.js' @@ -516,6 +517,10 @@ class WebgpuGraphicsDevice extends GraphicsDevice { return new WebgpuRenderTarget(renderTarget); } + createUploadStreamImpl(uploadStream) { + return new WebgpuUploadStream(uploadStream); + } + createBindGroupFormatImpl(bindGroupFormat) { return new WebgpuBindGroupFormat(bindGroupFormat); } diff --git a/src/platform/graphics/webgpu/webgpu-upload-stream.js b/src/platform/graphics/webgpu/webgpu-upload-stream.js new file mode 100644 index 00000000000..977a1c79fee --- /dev/null +++ b/src/platform/graphics/webgpu/webgpu-upload-stream.js @@ -0,0 +1,162 @@ +import { Debug } from '../../../core/debug.js'; + +/** + * @import { UploadStream } from '../upload-stream.js' + */ + +/** + * WebGPU implementation of UploadStream. + * Can use either simple direct writes or optimized staging buffer strategy. + * + * @ignore + */ +class WebgpuUploadStream { + /** + * Available staging buffers ready for immediate use. + * + * @type {Array} + */ + availableStagingBuffers = []; + + /** + * Staging buffers currently in use by the GPU. + * + * @type {Array} + */ + pendingStagingBuffers = []; + + _destroyed = false; + + /** + * @param {UploadStream} uploadStream - The upload stream. + */ + constructor(uploadStream) { + this.uploadStream = uploadStream; + this.useSingleBuffer = uploadStream.useSingleBuffer; + } + + destroy() { + this._destroyed = true; + this.availableStagingBuffers.forEach(buffer => buffer.destroy()); + this.pendingStagingBuffers.forEach(buffer => buffer.destroy()); + } + + /** + * Update staging buffers: recycle completed ones and remove undersized buffers. + * + * @param {number} minByteSize - Minimum size for buffers to keep. Smaller buffers are destroyed. + */ + update(minByteSize) { + + // map all pending buffers + const pending = this.pendingStagingBuffers; + for (let i = 0; i < pending.length; i++) { + const buffer = pending[i]; + buffer.mapAsync(GPUMapMode.WRITE).then(() => { + if (!this._destroyed) { + this.availableStagingBuffers.push(buffer); + } else { + buffer.destroy(); + } + }); + } + pending.length = 0; + + // remove any available buffers that are too small + const available = this.availableStagingBuffers; + for (let i = available.length - 1; i >= 0; i--) { + if (available[i].size < minByteSize) { + available[i].destroy(); + available.splice(i, 1); + } + } + } + + /** + * Upload data to a storage buffer using staging buffers (optimized) or direct write (simple). + * + * @param {Uint8Array|Uint32Array|Float32Array} data - The data to upload. + * @param {import('../storage-buffer.js').StorageBuffer} target - The target storage buffer. + * @param {number} offset - The element offset in the target. Byte offset must be a multiple of 4. + * @param {number} size - The number of elements to upload. Byte size must be a multiple of 4. + */ + upload(data, target, offset, size) { + if (this.useSingleBuffer) { + // simple path: direct write (blocking) + this.uploadDirect(data, target, offset, size); + } else { + // optimized path: staging buffers (non-blocking) + this.uploadStaging(data, target, offset, size); + } + } + + /** + * Direct storage buffer write (simple, blocking). + * + * @param {Uint8Array|Uint32Array|Float32Array} data - The data to upload. + * @param {import('../storage-buffer.js').StorageBuffer} target - The target storage buffer. + * @param {number} offset - The element offset in the target. + * @param {number} size - The number of elements to upload. + * @private + */ + uploadDirect(data, target, offset, size) { + const byteOffset = offset * data.BYTES_PER_ELEMENT; + const byteSize = size * data.BYTES_PER_ELEMENT; + + // WebGPU requires 4-byte alignment for buffer operations + Debug.assert(byteOffset % 4 === 0, `WebGPU upload offset in bytes (${byteOffset}) must be a multiple of 4`); + Debug.assert(byteSize % 4 === 0, `WebGPU upload size in bytes (${byteSize}) must be a multiple of 4`); + target.write(byteOffset, data, 0, size); + } + + /** + * Staging buffer-based upload. + * + * @param {Uint8Array|Uint32Array|Float32Array} data - The data to upload. + * @param {import('../storage-buffer.js').StorageBuffer} target - The target storage buffer. + * @param {number} offset - The element offset in the target. + * @param {number} size - The number of elements to upload. + * @private + */ + uploadStaging(data, target, offset, size) { + const device = this.uploadStream.device; + + const byteOffset = offset * data.BYTES_PER_ELEMENT; + const byteSize = size * data.BYTES_PER_ELEMENT; + + // Update staging buffers + this.update(byteSize); + + // WebGPU copyBufferToBuffer requires offset and size to be multiples of 4 bytes + Debug.assert(byteOffset % 4 === 0, `WebGPU upload offset in bytes (${byteOffset}) must be a multiple of 4 for copyBufferToBuffer`); + Debug.assert(byteSize % 4 === 0, `WebGPU upload size in bytes (${byteSize}) must be a multiple of 4 for copyBufferToBuffer`); + + // Get or create a staging buffer (guaranteed to be large enough after recycling) + const buffer = this.availableStagingBuffers.pop() ?? (() => { + // @ts-ignore - wgpu is available on WebgpuGraphicsDevice + return this.uploadStream.device.wgpu.createBuffer({ + size: byteSize, + usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, + mappedAtCreation: true + }); + })(); + + // Write to mapped range (non-blocking) + const mappedRange = buffer.getMappedRange(); + new Uint8Array(mappedRange).set(new Uint8Array(data.buffer, data.byteOffset, byteSize)); + buffer.unmap(); + + // Copy from staging to storage buffer (GPU-side) + // @ts-ignore - getCommandEncoder is available on WebgpuGraphicsDevice + device.getCommandEncoder().copyBufferToBuffer( + buffer, 0, + target.impl.buffer, byteOffset, + byteSize + ); + + // Track for recycling + this.pendingStagingBuffers.push(buffer); + } +} + +export { WebgpuUploadStream }; diff --git a/src/scene/gsplat-unified/gsplat-manager.js b/src/scene/gsplat-unified/gsplat-manager.js index 4d6cc2ff09f..58d340ade46 100644 --- a/src/scene/gsplat-unified/gsplat-manager.js +++ b/src/scene/gsplat-unified/gsplat-manager.js @@ -312,11 +312,14 @@ class GSplatManager { } // number of splats to render - this.renderer.setNumSplats(count); + this.renderer.update(count, textureSize); } // update order texture this.workBuffer.setOrderData(orderData); + + // update renderer with new order data + this.renderer.frameUpdate(); } } diff --git a/src/scene/gsplat-unified/gsplat-renderer.js b/src/scene/gsplat-unified/gsplat-renderer.js index 9343fb7469d..2875c51e1d2 100644 --- a/src/scene/gsplat-unified/gsplat-renderer.js +++ b/src/scene/gsplat-unified/gsplat-renderer.js @@ -59,6 +59,7 @@ class GSplatRenderer { // input format this._material.setDefine('GSPLAT_WORKBUFFER_DATA', true); + this._material.setDefine('STORAGE_ORDER', device.isWebGPU); // input textures (work buffer textures) this._material.setParameter('splatColor', workBuffer.colorTexture); @@ -70,7 +71,12 @@ class GSplatRenderer { // set instance properties const dither = false; this._material.setParameter('numSplats', 0); - this._material.setParameter('splatOrder', workBuffer.orderTexture); + + // Set order data - texture for WebGL only at init time, it does not need to be updated + if (workBuffer.orderTexture) { + this._material.setParameter('splatOrder', workBuffer.orderTexture); + } + this._material.setParameter('alphaClip', 0.3); this._material.setDefine(`DITHER_${dither ? 'BLUENOISE' : 'NONE'}`, ''); this._material.cull = CULLFACE_NONE; @@ -88,18 +94,29 @@ class GSplatRenderer { this.meshInstance.destroy(); } - setNumSplats(count) { + update(count, textureSize) { // limit splat render count to exclude those behind the camera this.meshInstance.instancingCount = Math.ceil(count / GSplatResourceBase.instanceSize); // update splat count on the material this._material.setParameter('numSplats', count); + this._material.setParameter('splatTextureSize', textureSize); // disable rendering if no splats to render this.meshInstance.visible = count > 0; } + frameUpdate() { + + // Set the appropriate order data resource based on device type + if (this.device.isWebGPU) { + this._material.setParameter('splatOrder', this.workBuffer.orderBuffer); + } else { + this._material.setParameter('splatOrder', this.workBuffer.orderTexture); + } + } + setMaxNumSplats(numSplats) { // round up to the nearest multiple of instanceSize (same as createInstanceIndices does internally) @@ -114,6 +131,9 @@ class GSplatRenderer { // create new instance indices this.instanceIndices = GSplatResourceBase.createInstanceIndices(this.device, numSplats); this.meshInstance.setInstancing(this.instanceIndices, true); + + // update texture size uniform + this._material.setParameter('splatTextureSize', this.workBuffer.textureSize); } } diff --git a/src/scene/gsplat-unified/gsplat-work-buffer.js b/src/scene/gsplat-unified/gsplat-work-buffer.js index a4e7da87bb9..e0d6953ca87 100644 --- a/src/scene/gsplat-unified/gsplat-work-buffer.js +++ b/src/scene/gsplat-unified/gsplat-work-buffer.js @@ -1,7 +1,9 @@ import { Debug } from '../../core/debug.js'; -import { ADDRESS_CLAMP_TO_EDGE, FILTER_NEAREST, PIXELFORMAT_R32U, PIXELFORMAT_RGBA16F } from '../../platform/graphics/constants.js'; +import { ADDRESS_CLAMP_TO_EDGE, FILTER_NEAREST, PIXELFORMAT_R32U, PIXELFORMAT_RGBA16F, BUFFERUSAGE_COPY_DST } from '../../platform/graphics/constants.js'; import { RenderTarget } from '../../platform/graphics/render-target.js'; +import { StorageBuffer } from '../../platform/graphics/storage-buffer.js'; import { Texture } from '../../platform/graphics/texture.js'; +import { UploadStream } from '../../platform/graphics/upload-stream.js'; import { GSplatWorkBufferRenderPass } from './gsplat-work-buffer-render-pass.js'; let id = 0; @@ -40,6 +42,15 @@ class GSplatWorkBuffer { /** @type {Texture} */ orderTexture; + /** @type {StorageBuffer} */ + orderBuffer; + + /** @type {number} */ + _textureSize = 1; + + /** @type {UploadStream} */ + uploadStream; + /** @type {GSplatWorkBufferRenderPass} */ renderPass; @@ -58,7 +69,15 @@ class GSplatWorkBuffer { flipY: true }); - this.orderTexture = this.createTexture('SplatGlobalOrder', PIXELFORMAT_R32U, 1, 1); + // Create upload stream for non-blocking uploads + this.uploadStream = new UploadStream(device); + + // Use storage buffer on WebGPU, texture on WebGL + if (device.isWebGPU) { + this.orderBuffer = new StorageBuffer(device, 4, BUFFERUSAGE_COPY_DST); + } else { + this.orderTexture = this.createTexture('SplatGlobalOrder', PIXELFORMAT_R32U, 1, 1); + } // Create the optimized render pass for batched splat rendering this.renderPass = new GSplatWorkBufferRenderPass(device); @@ -72,23 +91,23 @@ class GSplatWorkBuffer { this.covBTexture?.destroy(); this.centerTexture?.destroy(); this.orderTexture?.destroy(); + this.orderBuffer?.destroy(); this.renderTarget?.destroy(); + this.uploadStream.destroy(); } get textureSize() { - return this.orderTexture.width; + return this._textureSize; } setOrderData(data) { - - const len = this.orderTexture.width * this.orderTexture.height; - if (len !== data.length) { - Debug.error('setOrderData: data length mismatch, got:', data.length, 'expected:', len, `(${this.orderTexture.width}x${this.orderTexture.height})`); + if (this.device.isWebGPU) { + Debug.assert(data.length <= this._textureSize * this._textureSize); + this.uploadStream.upload(data, this.orderBuffer, 0, data.length); + } else { + Debug.assert(data.length === this._textureSize * this._textureSize); + this.uploadStream.upload(data, this.orderTexture, 0, data.length); } - - // upload data to texture - this.orderTexture._levels[0] = data; - this.orderTexture.upload(); } createTexture(name, format, w, h) { @@ -112,7 +131,17 @@ class GSplatWorkBuffer { resize(textureSize) { Debug.assert(textureSize); this.renderTarget.resize(textureSize, textureSize); - this.orderTexture.resize(textureSize, textureSize); + this._textureSize = textureSize; + + if (this.device.isWebGPU) { + const newByteSize = textureSize * textureSize * 4; + if (this.orderBuffer.byteSize < newByteSize) { + this.orderBuffer.destroy(); + this.orderBuffer = new StorageBuffer(this.device, newByteSize, BUFFERUSAGE_COPY_DST); + } + } else { + this.orderTexture.resize(textureSize, textureSize); + } } /** diff --git a/src/scene/gsplat/gsplat-instance.js b/src/scene/gsplat/gsplat-instance.js index 2398d2d6208..cc50fba97c5 100644 --- a/src/scene/gsplat/gsplat-instance.js +++ b/src/scene/gsplat/gsplat-instance.js @@ -75,7 +75,7 @@ class GSplatInstance { this._material = options.material; // patch splat order - this._material.setParameter('splatOrder', this.orderTexture); + this.setMaterialOrderTexture(this._material); } else { // construct the material this._material = new ShaderMaterial({ @@ -131,6 +131,16 @@ class GSplatInstance { this.sorter?.destroy(); } + /** + * Set order data parameters on the material. + * + * @param {ShaderMaterial} material - The material to configure. + */ + setMaterialOrderTexture(material) { + material.setParameter('splatOrder', this.orderTexture); + material.setParameter('splatTextureSize', this.orderTexture.width); + } + /** * @param {ShaderMaterial} value - The material instance. */ @@ -140,7 +150,7 @@ class GSplatInstance { this._material = value; // patch order texture - this._material.setParameter('splatOrder', this.orderTexture); + this.setMaterialOrderTexture(this._material); if (this.meshInstance) { this.meshInstance.material = value; @@ -165,7 +175,7 @@ class GSplatInstance { // set instance properties material.setParameter('numSplats', 0); - material.setParameter('splatOrder', this.orderTexture); + this.setMaterialOrderTexture(material); material.setParameter('alphaClip', 0.3); material.setDefine(`DITHER_${options.dither ? 'BLUENOISE' : 'NONE'}`, ''); material.cull = CULLFACE_NONE; diff --git a/src/scene/shader-lib/glsl/chunks/gsplat/vert/gsplatSource.js b/src/scene/shader-lib/glsl/chunks/gsplat/vert/gsplatSource.js index 14bfc5b79e0..bd02071dc83 100644 --- a/src/scene/shader-lib/glsl/chunks/gsplat/vert/gsplatSource.js +++ b/src/scene/shader-lib/glsl/chunks/gsplat/vert/gsplatSource.js @@ -3,12 +3,11 @@ attribute vec3 vertex_position; // xy: cornerUV, z: render order offset attribute uint vertex_id_attrib; // render order base uniform uint numSplats; // total number of splats +uniform uint splatTextureSize; // texture size for splat data uniform highp usampler2D splatOrder; // per-splat index to source gaussian // initialize the splat source structure bool initSource(out SplatSource source) { - uint w = uint(textureSize(splatOrder, 0).x); - // calculate splat order source.order = vertex_id_attrib + uint(vertex_position.z); @@ -17,13 +16,13 @@ bool initSource(out SplatSource source) { return false; } - ivec2 orderUV = ivec2(source.order % w, source.order / w); + ivec2 orderUV = ivec2(source.order % splatTextureSize, source.order / splatTextureSize); // read splat id source.id = texelFetch(splatOrder, orderUV, 0).r; // map id to uv - source.uv = ivec2(source.id % w, source.id / w); + source.uv = ivec2(source.id % splatTextureSize, source.id / splatTextureSize); // get the corner source.cornerUV = vertex_position.xy; diff --git a/src/scene/shader-lib/wgsl/chunks/gsplat/vert/gsplatSource.js b/src/scene/shader-lib/wgsl/chunks/gsplat/vert/gsplatSource.js index e8eef8eb31e..6b957af5bd6 100644 --- a/src/scene/shader-lib/wgsl/chunks/gsplat/vert/gsplatSource.js +++ b/src/scene/shader-lib/wgsl/chunks/gsplat/vert/gsplatSource.js @@ -3,12 +3,17 @@ attribute vertex_position: vec3f; // xy: cornerUV, z: render order offse attribute vertex_id_attrib: u32; // render order base uniform numSplats: u32; // total number of splats -var splatOrder: texture_2d; // per-splat index to source gaussian +uniform splatTextureSize: u32; // texture size for splat data + +#ifdef STORAGE_ORDER + var splatOrder: array; +#else + // support texture for non-unified gsplat rendering + var splatOrder: texture_2d; +#endif // initialize the splat source structure fn initSource(source: ptr) -> bool { - let w: u32 = textureDimensions(splatOrder, 0).x; - // calculate splat order source.order = vertex_id_attrib + u32(vertex_position.z); @@ -17,13 +22,16 @@ fn initSource(source: ptr) -> bool { return false; } - let orderUV = vec2i(vec2u(source.order % w, source.order / w)); - // read splat id - source.id = textureLoad(splatOrder, orderUV, 0).r; + #ifdef STORAGE_ORDER + source.id = splatOrder[source.order]; + #else + let uv = vec2u(source.order % uniform.splatTextureSize, source.order / uniform.splatTextureSize); + source.id = textureLoad(splatOrder, vec2i(uv), 0).r; + #endif // map id to uv - source.uv = vec2i(vec2u(source.id % w, source.id / w)); + source.uv = vec2i(vec2u(source.id % uniform.splatTextureSize, source.id / uniform.splatTextureSize)); // get the corner source.cornerUV = vertex_position.xy; From 0c5362a5e769e82865ad81e78d48fe3a9047eb2d Mon Sep 17 00:00:00 2001 From: Martin Valigursky Date: Tue, 14 Oct 2025 14:09:52 +0100 Subject: [PATCH 2/6] fix type issue --- src/platform/graphics/webgpu/webgpu-dynamic-buffer.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/platform/graphics/webgpu/webgpu-dynamic-buffer.js b/src/platform/graphics/webgpu/webgpu-dynamic-buffer.js index 4f4e647c8d2..92d661d751e 100644 --- a/src/platform/graphics/webgpu/webgpu-dynamic-buffer.js +++ b/src/platform/graphics/webgpu/webgpu-dynamic-buffer.js @@ -3,7 +3,7 @@ import { DynamicBuffer } from '../dynamic-buffer.js'; class WebgpuDynamicBuffer extends DynamicBuffer { /** - * @type {GPUBuffer} + * type {GPUBuffer} * @private */ buffer = null; From 27aeec003ee00305acca575b073deb2d206805c2 Mon Sep 17 00:00:00 2001 From: Martin Valigursky Date: Tue, 14 Oct 2025 14:19:41 +0100 Subject: [PATCH 3/6] type fix --- src/platform/graphics/webgpu/webgpu-upload-stream.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/platform/graphics/webgpu/webgpu-upload-stream.js b/src/platform/graphics/webgpu/webgpu-upload-stream.js index 977a1c79fee..81a4aec9a8f 100644 --- a/src/platform/graphics/webgpu/webgpu-upload-stream.js +++ b/src/platform/graphics/webgpu/webgpu-upload-stream.js @@ -14,14 +14,16 @@ class WebgpuUploadStream { /** * Available staging buffers ready for immediate use. * - * @type {Array} + * @type {GPUBuffer[]} + * @private */ availableStagingBuffers = []; /** * Staging buffers currently in use by the GPU. * - * @type {Array} + * @type {GPUBuffer[]} + * @private */ pendingStagingBuffers = []; From a64df2c4c5cab92f2fbfdd8ff44c2f0a260ab7a7 Mon Sep 17 00:00:00 2001 From: Martin Valigursky Date: Tue, 14 Oct 2025 16:52:50 +0100 Subject: [PATCH 4/6] removed test change --- src/platform/graphics/webgpu/webgpu-dynamic-buffer.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/platform/graphics/webgpu/webgpu-dynamic-buffer.js b/src/platform/graphics/webgpu/webgpu-dynamic-buffer.js index 92d661d751e..4f4e647c8d2 100644 --- a/src/platform/graphics/webgpu/webgpu-dynamic-buffer.js +++ b/src/platform/graphics/webgpu/webgpu-dynamic-buffer.js @@ -3,7 +3,7 @@ import { DynamicBuffer } from '../dynamic-buffer.js'; class WebgpuDynamicBuffer extends DynamicBuffer { /** - * type {GPUBuffer} + * @type {GPUBuffer} * @private */ buffer = null; From 45c6c8a6295bf978078fffb8a1d7f30cfcfec60a Mon Sep 17 00:00:00 2001 From: Martin Valigursky Date: Tue, 14 Oct 2025 16:53:09 +0100 Subject: [PATCH 5/6] added more default lod levels --- src/framework/components/gsplat/component.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/components/gsplat/component.js b/src/framework/components/gsplat/component.js index 517f6043b60..7d3ef844d2d 100644 --- a/src/framework/components/gsplat/component.js +++ b/src/framework/components/gsplat/component.js @@ -78,7 +78,7 @@ class GSplatComponent extends Component { * @type {number[]|null} * @private */ - _lodDistances = [5, 10, 15, 20, 25]; + _lodDistances = [5, 10, 15, 20, 25, 30, 35, 40]; /** * @type {BoundingBox|null} From 99fbfcc8159d05a357765e6852e640cae7609606 Mon Sep 17 00:00:00 2001 From: Martin Valigursky Date: Wed, 15 Oct 2025 10:39:35 +0100 Subject: [PATCH 6/6] handle case of multiple sorted results in a single frame --- .../graphics/webgpu/webgpu-upload-stream.js | 15 ++++++-- src/scene/gsplat-unified/gsplat-manager.js | 24 +++++++++---- .../gsplat-unified/gsplat-unified-sorter.js | 35 +++++++++++++++++-- 3 files changed, 63 insertions(+), 11 deletions(-) diff --git a/src/platform/graphics/webgpu/webgpu-upload-stream.js b/src/platform/graphics/webgpu/webgpu-upload-stream.js index 81a4aec9a8f..8b8bb3c89b0 100644 --- a/src/platform/graphics/webgpu/webgpu-upload-stream.js +++ b/src/platform/graphics/webgpu/webgpu-upload-stream.js @@ -1,9 +1,11 @@ -import { Debug } from '../../../core/debug.js'; +import { Debug, DebugHelper } from '../../../core/debug.js'; /** * @import { UploadStream } from '../upload-stream.js' */ +let id = 0; + /** * WebGPU implementation of UploadStream. * Can use either simple direct writes or optimized staging buffer strategy. @@ -136,11 +138,13 @@ class WebgpuUploadStream { // Get or create a staging buffer (guaranteed to be large enough after recycling) const buffer = this.availableStagingBuffers.pop() ?? (() => { // @ts-ignore - wgpu is available on WebgpuGraphicsDevice - return this.uploadStream.device.wgpu.createBuffer({ + const newBuffer = this.uploadStream.device.wgpu.createBuffer({ size: byteSize, usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, mappedAtCreation: true }); + DebugHelper.setLabel(newBuffer, `UploadStream-Staging-${id++}`); + return newBuffer; })(); // Write to mapped range (non-blocking) @@ -156,6 +160,13 @@ class WebgpuUploadStream { byteSize ); + // Detect multiple uploads per frame (indicates command buffer hasn't been submitted yet) + Debug.assert( + this.pendingStagingBuffers.length === 0, + 'Multiple WebGPU staging buffer uploads detected in the same frame before command buffer submission. ' + + 'This can cause "buffer used while mapped" errors. Ensure only one upload occurs per frame.' + ); + // Track for recycling this.pendingStagingBuffers.push(buffer); } diff --git a/src/scene/gsplat-unified/gsplat-manager.js b/src/scene/gsplat-unified/gsplat-manager.js index 58d340ade46..5ad4000adaf 100644 --- a/src/scene/gsplat-unified/gsplat-manager.js +++ b/src/scene/gsplat-unified/gsplat-manager.js @@ -271,15 +271,17 @@ class GSplatManager { onSorted(count, version, orderData) { - this.sortedVersion = version; - - // remove old state - const oldState = this.worldStates.get(version - 1); - if (oldState) { - this.worldStates.delete(version - 1); - oldState.destroy(); + // remove all old states between last sorted version and current version + for (let v = this.sortedVersion + 1; v < version; v++) { + const oldState = this.worldStates.get(v); + if (oldState) { + this.worldStates.delete(v); + oldState.destroy(); + } } + this.sortedVersion = version; + // find the world state that has been sorted const worldState = this.worldStates.get(version); Debug.assert(worldState, `World state with version ${version} not found`); @@ -302,6 +304,11 @@ class GSplatManager { const colorize = this.scene.gsplat.colorizeLod; this.workBuffer.render(worldState.splats, this.cameraNode, colorize ? _lodColorsRaw : undefined); + // update all splats to sync their transforms (prevents redundant re-render later) + worldState.splats.forEach((splat) => { + splat.update(); + }); + // apply pending file-release requests if (worldState.pendingReleases && worldState.pendingReleases.length) { for (const [octree, fileIndex] of worldState.pendingReleases) { @@ -359,6 +366,9 @@ class GSplatManager { update() { + // apply any pending sorted results + this.sorter.applyPendingSorted(); + let fullUpdate = false; this.framesTillFullUpdate--; if (this.framesTillFullUpdate <= 0) { diff --git a/src/scene/gsplat-unified/gsplat-unified-sorter.js b/src/scene/gsplat-unified/gsplat-unified-sorter.js index ae0429bb1d7..492fed95563 100644 --- a/src/scene/gsplat-unified/gsplat-unified-sorter.js +++ b/src/scene/gsplat-unified/gsplat-unified-sorter.js @@ -15,6 +15,14 @@ class GSplatUnifiedSorter extends EventHandler { // true if we have new version to process hasNewVersion = false; + /** + * Pending sorted result to be applied next frame. If multiple sorted results are received from + * the worker, the latest result is stored here. + * + * @type {{ count: number, version: number, orderData: Uint32Array }|null} + */ + pendingSorted = null; + /** @type {Set} */ centersSet = new Set(); @@ -51,9 +59,31 @@ class GSplatUnifiedSorter extends EventHandler { // decrement jobs in flight counter this.jobsInFlight--; - this.fire('sorted', msgData.count, msgData.version, orderData); + // if there's already a pending result, return its orderData to the pool + if (this.pendingSorted) { + this.releaseOrderData(this.pendingSorted.orderData); + } + + // store the result to be available + this.pendingSorted = { + count: msgData.count, + version: msgData.version, + orderData: orderData + }; + } + + applyPendingSorted() { + if (this.pendingSorted) { + const { count, version, orderData } = this.pendingSorted; + this.pendingSorted = null; + this.fire('sorted', count, version, orderData); + + // reuse order data + this.releaseOrderData(orderData); + } + } - // reuse order data + releaseOrderData(orderData) { if (orderData.length === this.bufferLength) { this.availableOrderData.push(orderData); } @@ -61,6 +91,7 @@ class GSplatUnifiedSorter extends EventHandler { destroy() { this._destroyed = true; + this.pendingSorted = null; this.worker.terminate(); this.worker = null; }