UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
GPUWorkGroupLoadBalancer.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2
3#pragma once
4
5#include "CoreMinimal.h"
8#include "RendererInterface.h"
9#include "RenderGraphUtils.h"
10
12
13/*
14 */
16{
17public:
18 static constexpr uint32 ThreadGroupSizeLog2 = 6U;
19 static constexpr uint32 ThreadGroupSize = 1u << ThreadGroupSizeLog2;
21 static constexpr uint32 PrefixBitMask = (1U << PrefixBits) - 1U;
22 static constexpr uint32 NumItemBits = PrefixBits;
23 static constexpr uint32 NumItemMask = (1U << NumItemBits) - 1U;
24 static constexpr uint32 PayLoadBits = 32u - (1 + NumItemBits + PrefixBits);
25
30 {
33 uint32 NumItems; // Note: NumItems = countbits(WorkGroupWorkBoundary)
35 uint32 Payload; // aribitrary 32-bit payload for each workgroup
36 };
37
38 FWorkGroupInfo PackWorkGroupInfo(uint32 FirstItem, uint32 NumItems, uint32 Payload, uint32 CarryOverStartOffset, uint64 WorkGroupWorkBoundary)
39 {
40 checkSlow(NumItems > 0);
41 checkSlow(NumItems - 1 < (1U << NumItemBits));
42 checkSlow(FirstItem < (1U << (32U - NumItemBits)));
43
44 return FWorkGroupInfo{ { uint32(WorkGroupWorkBoundary), uint32(WorkGroupWorkBoundary >> 32u) }, FirstItem, NumItems, CarryOverStartOffset, Payload };
45 }
46
47
52 {
54 };
55
57 {
58 checkSlow(NumChildren > 0);
59 checkSlow(NumChildren - 1 < (1U << NumItemBits));
60 //checkSlow(ParentWorkItemOffset < (1U << (32U - NumItemBits)));
62 // arbitrary per-item payload encoded in as many bits as we happen to have left over
63 checkSlow(Payload < (1U << PayLoadBits));
64 uint32 Packed = (bHasCarryOver ? 1u : 0u)
65 | ((NumChildren - 1u) << 1u)
66 | (BatchPrefixSum << (1u + NumItemBits))
67 | (Payload << (1u + NumItemBits + PrefixBits));
68 return FPackedItem { Packed };
69 }
70
73 SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer< FPackedItem >, ItemBuffer)
75 SHADER_PARAMETER(uint32, NumItems)
77
78 void ReserveStorage(int32 NumBatches, int32 NumItems, int32 NumWork)
79 {
80 WorkGroupInfos.Empty(NumBatches);
81 Items.Empty(NumItems);
82 }
83
87 template <typename PayloadGeneratorType>
88 void Add(PayloadGeneratorType& PayloadGenerator, uint32 NumChildren)
89 {
90 uint32 NumAdded = 0u;
91 while (NumAdded < NumChildren)
92 {
94
96 {
97 const uint32 NumChildrenThisItem = FMath::Min(MaxChildrenThisBatch, NumChildren - NumAdded);
98
99 Items.Add(PackItem(NumAdded != 0u, NumChildrenThisItem, PayloadGenerator.GetItemPayload(), CurrentWorkGroupPrefixSum));
104 }
105
106 // Flush batch if it is not possible to add any more items (for one of the reasons)
108 {
115 }
116 }
118 }
119
120 bool IsEmpty() const
121 {
122 return Items.IsEmpty();
123 }
124
128 template <typename PayloadGeneratorType>
137
139
141
147
148 /*
149 * Publish constants to a shader implementing a kernel using the load balancer.
150 * Call from ModifyCompilationEnvironment
151 */
153
155
156protected:
159
166};
167
168
169/*
170 */
171template <typename ParentInfoType>
173{
174private:
175 struct FPayloadGenerator;
176public:
178
179 void ReserveStorage(int32 NumBatches, int32 NumItems, int32 NumParents)
180 {
181 FGPUWorkGroupLoadBalancer::ReserveStorage(NumBatches, NumItems);
182 ParentInfos.Empty(NumParents);
183 }
184
188 void Add(FParentInfo&& ParentInfo, uint32 NumChildren)
189 {
190 PayloadGenerator.ParentItemOffset = uint32(ParentInfos.Num());
191 ParentInfos.Emplace(MoveTemp(ParentInfo));
192 FGPUWorkGroupLoadBalancer::Add(PayloadGenerator, NumChildren);
193 }
194
200
202 {
204
205 FRDGBufferRef ParentInfosRDG = CreateStructuredBuffer(GraphBuilder, TEXT("GPUWorkGroupLoadBalancer.ParentInfos"), [&]() -> auto& { return ParentInfos; });
206 OutShaderParameters.ParentInfoBuffer = GraphBuilder.CreateSRV(ParentInfosRDG);
207 OutShaderParameters.NumParentInfos = ~0u;
208 }
209
215
220 {
222 }
223
224private:
225 struct FPayloadGenerator
226 {
227 uint32 CurrentWorkGroupFirstParentItem = 0u;
228 uint32 ParentItemOffset = 0u;
229
230 uint32 GetItemPayload() const
231 {
232 return ParentItemOffset - CurrentWorkGroupFirstParentItem;
233 }
234
235 uint32 GetWorkGroupPayload()
236 {
237 uint32 Tmp = CurrentWorkGroupFirstParentItem;
238 CurrentWorkGroupFirstParentItem = ParentItemOffset;
239 return Tmp;
240 }
241 };
242
244 FPayloadGenerator PayloadGenerator;
245};
#define checkSlow(expr)
Definition AssertionMacros.h:332
#define TEXT(x)
Definition Platform.h:1272
FPlatformTypes::int32 int32
A 32-bit signed integer.
Definition Platform.h:1125
FPlatformTypes::uint64 uint64
A 64-bit unsigned integer.
Definition Platform.h:1117
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
FRDGBufferRef CreateStructuredBuffer(FRDGBuilder &GraphBuilder, const TCHAR *Name, uint32 BytesPerElement, uint32 NumElements, const void *InitialData, uint64 InitialDataSize, ERDGInitialDataFlags InitialDataFlags)
Definition RenderGraphUtils.cpp:888
#define SHADER_PARAMETER_RDG_BUFFER_SRV(ShaderType, MemberName)
Definition ShaderParameterMacros.h:1800
#define BEGIN_SHADER_PARAMETER_STRUCT(StructTypeName, DllStorage)
Definition ShaderParameterMacros.h:1482
#define SHADER_PARAMETER_STRUCT_INCLUDE(StructType, MemberName)
Definition ShaderParameterMacros.h:1895
#define END_SHADER_PARAMETER_STRUCT()
Definition ShaderParameterMacros.h:1485
#define SHADER_PARAMETER(MemberType, MemberName)
Definition ShaderParameterMacros.h:1684
UE_INTRINSIC_CAST UE_REWRITE constexpr std::remove_reference_t< T > && MoveTemp(T &&Obj) noexcept
Definition UnrealTemplate.h:520
uint32_t uint32
Definition binka_ue_file_header.h:6
Definition GPUWorkGroupLoadBalancer.h:16
uint64 CurrentWorkGroupWorkBoundary
Definition GPUWorkGroupLoadBalancer.h:165
FIntVector GetWrappedCsGroupCount() const
Definition GPUWorkGroupLoadBalancer.cpp:29
uint32 CurrentWorkGroupCarryOver
Definition GPUWorkGroupLoadBalancer.h:164
void Add(PayloadGeneratorType &PayloadGenerator, uint32 NumChildren)
Definition GPUWorkGroupLoadBalancer.h:88
static constexpr uint32 NumItemMask
Definition GPUWorkGroupLoadBalancer.h:23
static constexpr uint32 PayLoadBits
Definition GPUWorkGroupLoadBalancer.h:24
FWorkGroupInfo PackWorkGroupInfo(uint32 FirstItem, uint32 NumItems, uint32 Payload, uint32 CarryOverStartOffset, uint64 WorkGroupWorkBoundary)
Definition GPUWorkGroupLoadBalancer.h:38
uint32 CurrentWorkGroupFirstItem
Definition GPUWorkGroupLoadBalancer.h:162
TArray< FPackedItem, SceneRenderingAllocator > Items
Definition GPUWorkGroupLoadBalancer.h:158
uint32 CurrentWorkGroupPrefixSum
Definition GPUWorkGroupLoadBalancer.h:160
void FinalizeBatches(PayloadGeneratorType &PayloadGenerator)
Definition GPUWorkGroupLoadBalancer.h:129
uint32 CurrentWorkGroupNumItems
Definition GPUWorkGroupLoadBalancer.h:161
uint32 TotalChildren
Definition GPUWorkGroupLoadBalancer.h:163
void FinalizeParametersAsync(FShaderParameters &OutShaderParameters)
Definition GPUWorkGroupLoadBalancer.cpp:22
bool IsEmpty() const
Definition GPUWorkGroupLoadBalancer.h:120
FPackedItem PackItem(bool bHasCarryOver, uint32 NumChildren, uint32 Payload, uint32 BatchPrefixSum)
Definition GPUWorkGroupLoadBalancer.h:56
uint32 GetTotalChildren() const
Definition GPUWorkGroupLoadBalancer.h:154
TArray< FWorkGroupInfo, SceneRenderingAllocator > WorkGroupInfos
Definition GPUWorkGroupLoadBalancer.h:157
void ReserveStorage(int32 NumBatches, int32 NumItems, int32 NumWork)
Definition GPUWorkGroupLoadBalancer.h:78
void GetParametersAsync(FRDGBuilder &GraphBuilder, FShaderParameters &OutShaderParameters)
Definition GPUWorkGroupLoadBalancer.cpp:6
static constexpr uint32 PrefixBits
Definition GPUWorkGroupLoadBalancer.h:20
static constexpr uint32 ThreadGroupSizeLog2
Definition GPUWorkGroupLoadBalancer.h:18
static constexpr uint32 ThreadGroupSize
Definition GPUWorkGroupLoadBalancer.h:19
static constexpr uint32 PrefixBitMask
Definition GPUWorkGroupLoadBalancer.h:21
static void SetShaderDefines(FShaderCompilerEnvironment &OutEnvironment)
Definition GPUWorkGroupLoadBalancer.cpp:34
static constexpr uint32 NumItemBits
Definition GPUWorkGroupLoadBalancer.h:22
Definition RenderGraphResources.h:1321
Definition RenderGraphBuilder.h:49
Definition Array.h:670
Definition GPUWorkGroupLoadBalancer.h:173
ParentInfoType FParentInfo
Definition GPUWorkGroupLoadBalancer.h:177
void Add(FParentInfo &&ParentInfo, uint32 NumChildren)
Definition GPUWorkGroupLoadBalancer.h:188
void ReserveStorage(int32 NumBatches, int32 NumItems, int32 NumParents)
Definition GPUWorkGroupLoadBalancer.h:179
void GetParametersAsync(FRDGBuilder &GraphBuilder, FShaderParameters &OutShaderParameters)
Definition GPUWorkGroupLoadBalancer.h:201
void FinalizeBatches()
Definition GPUWorkGroupLoadBalancer.h:219
void FinalizeParametersAsync(FShaderParameters &OutShaderParameters)
Definition GPUWorkGroupLoadBalancer.h:210
Definition GPUWorkGroupLoadBalancer.h:52
uint32 Packed
Definition GPUWorkGroupLoadBalancer.h:53
Definition GPUWorkGroupLoadBalancer.h:30
uint32 Payload
Definition GPUWorkGroupLoadBalancer.h:35
uint32 NumItems
Definition GPUWorkGroupLoadBalancer.h:33
FUint32Point WorkGroupWorkBoundary
Definition GPUWorkGroupLoadBalancer.h:31
uint32 CarryOverStartOffset
Definition GPUWorkGroupLoadBalancer.h:34
uint32 FirstItem
Definition GPUWorkGroupLoadBalancer.h:32
Definition ShaderCore.h:544