UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
InstanceCullingLoadBalancer.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2
3#pragma once
4
5#include "HAL/Platform.h"
7#include "RendererInterface.h"
9#include "RenderGraphUtils.h"
10
12
13/*
14 * Helper to build the needed data to run per-instance operation on the GPU in a balanced way
15 */
17{
18public:
19 static constexpr uint32 ThreadGroupSize = 64U;
20
21 // Number of bits needed for prefix sum storage
22 static constexpr uint32 PrefixBits = 6U;//ILog2Const(uint32(ThreadGroupSize));
23 static_assert((1U << PrefixBits) == ThreadGroupSize, "ThreadGroupSize and PrefixBits must be kept in sync");
24 static constexpr uint32 PrefixBitMask = (1U << PrefixBits) - 1U;
25
26 static constexpr uint32 NumInstancesItemBits = PrefixBits + 1U;
27 static constexpr uint32 NumInstancesItemMask = (1U << NumInstancesItemBits) - 1U;
28
29
34
35 FPackedBatch PackBatch(uint32 FirstItem, uint32 NumItems)
36 {
37 checkSlow(NumItems < (1U << NumInstancesItemBits));
38 checkSlow(FirstItem < (1U << (32U - NumInstancesItemBits)));
39
40 return FPackedBatch{ (FirstItem << NumInstancesItemBits) | (NumItems & NumInstancesItemMask) };
41 }
42
44 {
45 // packed 32-NumInstancesItemBits:NumInstancesItemBits - need one more bit for the case where one item has ThreadGroupSize work to do
47 // packed 32-PrefixBits:PrefixBits
49 };
50 FPackedItem PackItem(uint32 InstanceDataOffset, uint32 NumInstances, uint32 Payload, uint32 BatchPrefixSum)
51 {
52 checkSlow(NumInstances < (1U << NumInstancesItemBits));
53 checkSlow(InstanceDataOffset < (1U << (32U - NumInstancesItemBits)));
55 checkSlow(Payload < (1U << (32U - PrefixBits)));
56
57 return FPackedItem
58 {
59 (InstanceDataOffset << NumInstancesItemBits) | (NumInstances & NumInstancesItemMask),
60 (Payload << PrefixBits) | (BatchPrefixSum & PrefixBitMask)
61 };
62 }
63
65 SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer< FPackedBatch >, BatchBuffer)
66 SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer< FPackedItem >, ItemBuffer)
67 SHADER_PARAMETER(uint32, NumBatches)
68 SHADER_PARAMETER(uint32, NumItems)
69 SHADER_PARAMETER(uint32, NumGroupsPerBatch)
71
72 /*
73 * Publish constants to a shader implementing a kernel using the load balancer.
74 * Call from ModifyCompilationEnvironment
75 */
77
79 {
80 int32 NumBatches = 0;
81 int32 NumItems = 0;
82 // Optional to allow launching multiple groups that all get the same batch on the shader side
83 int32 NumGroupsPerBatch = 1;
84 FRDGBufferRef BatchBuffer = nullptr;
85 FRDGBufferRef ItemBuffer = nullptr;
86
87 void GetShaderParameters(FRDGBuilder& GraphBuilder, FShaderParameters& ShaderParameters) const;
88
93 template <typename TShaderClass>
95 FRDGBuilder& GraphBuilder,
96 FRDGEventName&& PassName,
98 typename TShaderClass::FParameters* Parameters) const
99 {
100 GetShaderParameters(GraphBuilder, Parameters->LoadBalancerParameters);
101 return FComputeShaderUtils::AddPass(GraphBuilder, Forward<FRDGEventName>(PassName), ComputeShader, Parameters, GetWrappedCsGroupCount());
102 }
103
105
106 };
107
109
111};
112
113/*
114 * Helper to build the needed data to run per-instance operation on the GPU in a balanced way
115 */
116template <typename InAllocatorType = FDefaultAllocator>
118{
119public:
121
122 void ReserveStorage(int32 NumBatches, int32 NumItems)
123 {
124 Data->Batches.Empty(NumBatches);
125 Data->Items.Empty(NumItems);
126 }
127
165
166 bool IsEmpty() const
167 {
168 return Data->Items.IsEmpty();
169 }
170
171 FGPUData Upload(FRDGBuilder& GraphBuilder, int32 NumGroupsPerBatch = 1)
172 {
174
175 const auto& LocalBatches = Data->Batches;
176 const auto& LocalItems = Data->Items;
177
178 GraphBuilder.AllocObject<TRefCountPtr<FData>>(Data);
179
181 }
182
183 /* Const variant that assumes the batches have already been finalized */
184 FGPUData UploadFinalized(FRDGBuilder& GraphBuilder, int32 NumGroupsPerBatch = 1) const
185 {
187
188 const auto& LocalBatches = Data->Batches;
189 const auto& LocalItems = Data->Items;
190
191 GraphBuilder.AllocObject<TRefCountPtr<FData>>(Data);
192
194 }
195
200 {
201 if (CurrentBatchNumItems != 0)
202 {
205 }
206 }
207
213 FIntVector GetWrappedCsGroupCount(int32 NumGroupsPerBatch = 1) const
214 {
215 return FInstanceCullingLoadBalancerBase::GetWrappedCsGroupCount(Data->Batches, NumGroupsPerBatch);
216 }
217
219 {
221 return Data->Batches;
222 };
223
225 {
227 return Data->Items;
228 }
229
231
232 template <typename AllocatorType>
234 {
235 Data->Batches.Append(Other.GetBatches().GetData(), Other.GetBatches().Num());
236 Data->Items.Append(Other.GetItems().GetData(), Other.GetItems().Num());
237 TotalInstances += Other.GetTotalNumInstances();
238 }
239
241 {
242 return TotalInstances == Data->Items.Num();
243 }
244
245protected:
251
253
259};
#define checkSlow(expr)
Definition AssertionMacros.h:332
#define check(expr)
Definition AssertionMacros.h:314
FPlatformTypes::int32 int32
A 32-bit signed integer.
Definition Platform.h:1125
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
ERDGInitialDataFlags
Definition RenderGraphDefinitions.h:274
#define SHADER_PARAMETER_RDG_BUFFER_SRV(ShaderType, MemberName)
Definition ShaderParameterMacros.h:1800
#define BEGIN_SHADER_PARAMETER_STRUCT(StructTypeName, DllStorage)
Definition ShaderParameterMacros.h:1482
#define END_SHADER_PARAMETER_STRUCT()
Definition ShaderParameterMacros.h:1485
#define SHADER_PARAMETER(MemberType, MemberName)
Definition ShaderParameterMacros.h:1684
uint32_t uint32
Definition binka_ue_file_header.h:6
Definition InstanceCullingLoadBalancer.h:17
FIntVector GetWrappedCsGroupCount(TConstArrayView< FPackedBatch > Batches, int32 NumGroupsPerBatch) const
Definition InstanceCullingLoadBalancer.cpp:37
static constexpr uint32 ThreadGroupSize
Definition InstanceCullingLoadBalancer.h:19
FGPUData Upload(FRDGBuilder &GraphBuilder, TConstArrayView< FPackedBatch > Batches, TConstArrayView< FPackedItem > Items, ERDGInitialDataFlags RDGInitialDataFlags, int32 NumGroupsPerBatch) const
Definition InstanceCullingLoadBalancer.cpp:25
static constexpr uint32 NumInstancesItemBits
Definition InstanceCullingLoadBalancer.h:26
FPackedItem PackItem(uint32 InstanceDataOffset, uint32 NumInstances, uint32 Payload, uint32 BatchPrefixSum)
Definition InstanceCullingLoadBalancer.h:50
FPackedBatch PackBatch(uint32 FirstItem, uint32 NumItems)
Definition InstanceCullingLoadBalancer.h:35
static constexpr uint32 PrefixBits
Definition InstanceCullingLoadBalancer.h:22
static constexpr uint32 PrefixBitMask
Definition InstanceCullingLoadBalancer.h:24
static constexpr uint32 NumInstancesItemMask
Definition InstanceCullingLoadBalancer.h:27
static void SetShaderDefines(FShaderCompilerEnvironment &OutEnvironment)
Definition InstanceCullingLoadBalancer.cpp:7
Definition RenderGraphResources.h:1321
Definition RenderGraphBuilder.h:49
ObjectType * AllocObject(TArgs &&... Args)
Definition RenderGraphBuilder.inl:158
Definition RenderGraphEvent.h:38
Definition RenderGraphPass.h:217
Definition Array.h:670
Definition InstanceCullingLoadBalancer.h:118
void Add(uint32 InstanceDataOffset, uint32 NumInstanceDataEntries, uint32 Payload)
Definition InstanceCullingLoadBalancer.h:131
const TArray< FPackedItem, AllocatorType > & GetItems() const
Definition InstanceCullingLoadBalancer.h:224
FGPUData UploadFinalized(FRDGBuilder &GraphBuilder, int32 NumGroupsPerBatch=1) const
Definition InstanceCullingLoadBalancer.h:184
uint32 CurrentBatchFirstItem
Definition InstanceCullingLoadBalancer.h:257
uint32 CurrentBatchPrefixSum
Definition InstanceCullingLoadBalancer.h:254
const TArray< FPackedBatch, AllocatorType > & GetBatches() const
Definition InstanceCullingLoadBalancer.h:218
TRefCountPtr< FData > Data
Definition InstanceCullingLoadBalancer.h:252
uint32 CurrentBatchNumItems
Definition InstanceCullingLoadBalancer.h:255
bool IsEmpty() const
Definition InstanceCullingLoadBalancer.h:166
FIntVector GetWrappedCsGroupCount(int32 NumGroupsPerBatch=1) const
Definition InstanceCullingLoadBalancer.h:213
void ReserveStorage(int32 NumBatches, int32 NumItems)
Definition InstanceCullingLoadBalancer.h:122
void AppendData(const TInstanceCullingLoadBalancer< AllocatorType > &Other)
Definition InstanceCullingLoadBalancer.h:233
uint32 CurrentBatchPackedPrefixSum
Definition InstanceCullingLoadBalancer.h:256
FGPUData Upload(FRDGBuilder &GraphBuilder, int32 NumGroupsPerBatch=1)
Definition InstanceCullingLoadBalancer.h:171
bool HasSingleInstanceItemsOnly() const
Definition InstanceCullingLoadBalancer.h:240
void FinalizeBatches()
Definition InstanceCullingLoadBalancer.h:199
uint32 TotalInstances
Definition InstanceCullingLoadBalancer.h:258
InAllocatorType AllocatorType
Definition InstanceCullingLoadBalancer.h:120
uint32 GetTotalNumInstances() const
Definition InstanceCullingLoadBalancer.h:230
Definition RefCounting.h:454
Definition RefCounting.h:355
Definition Shader.h:1021
FIntVector GetGroupCountWrapped(const int32 TargetGroupCount)
Definition RenderGraphUtils.h:398
FRDGPassRef AddPass(FRDGBuilder &GraphBuilder, FRDGEventName &&PassName, ERDGPassFlags PassFlags, const TShaderRef< TShaderClass > &ComputeShader, const FShaderParametersMetadata *ParametersMetadata, typename TShaderClass::FParameters *Parameters, FIntVector GroupCount)
Definition RenderGraphUtils.h:550
Definition InstanceCullingLoadBalancer.h:79
FIntVector GetWrappedCsGroupCount() const
Definition InstanceCullingLoadBalancer.h:104
FRDGPassRef AddPass(FRDGBuilder &GraphBuilder, FRDGEventName &&PassName, const TShaderRef< TShaderClass > &ComputeShader, typename TShaderClass::FParameters *Parameters) const
Definition InstanceCullingLoadBalancer.h:94
Definition InstanceCullingLoadBalancer.h:31
uint32 FirstItem_NumItems
Definition InstanceCullingLoadBalancer.h:32
Definition InstanceCullingLoadBalancer.h:44
uint32 Payload_BatchPrefixOffset
Definition InstanceCullingLoadBalancer.h:48
uint32 InstanceDataOffset_NumInstances
Definition InstanceCullingLoadBalancer.h:46
Definition ShaderCore.h:544
Definition InstanceCullingLoadBalancer.h:247
TArray< FPackedItem, AllocatorType > Items
Definition InstanceCullingLoadBalancer.h:249
TArray< FPackedBatch, AllocatorType > Batches
Definition InstanceCullingLoadBalancer.h:248