UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
Simd4.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2#pragma once
3
4#include "Chaos/SimdTypes.h"
5
6namespace Chaos
7{
8 namespace Private
9 {
10 // WIP: Not ready for public use. Keep in the Private namespace for now...
11 // NOTE: the below only work with TNumLanes == 4 for now
12
14 //
15 // 4-wide float typedefs
16 //
18
23
25 //
26 // 4-wide template specializations
27 //
29
30 template<>
37
38 template<>
45
46 template<>
53
54 template<>
61
62 template<>
64 {
65 TSimdRealf<4> Out;
67 return Out;
68 }
69
70 template<>
77
78 template<>
85
86
87#if 0
88 // Failed experiment!
89 template<>
91 {
96
97 // Load V0 and V1 into the component vectors in first and second lanes
102
103 // Load V2 into the component vectors in the third lane
108
109 // Load V3 into the component vectors in the fourth lane
114
115 TSimdVec3f<4> Out;
116 VectorStoreAligned(X, Out.VX);
117 VectorStoreAligned(Y, Out.VY);
118 VectorStoreAligned(Z, Out.VZ);
119 return Out;
120 }
121#endif
122
124 //
125 // 4-wide Logical operations
126 //
128
130 {
132 return VectorMaskBits(L) != 0;
133 }
134
136 {
138 return ((VectorMaskBits(L) & 0xF) == 0xF);
139 }
140
151
163
175
187
199
212
224
237
249
261
273
286
288 //
289 // 4-wide Math operations
290 //
292
294 {
296
298
299 FSimd4Realf Out;
300 VectorStoreAligned(Neg, Out.V);
301 return Out;
302 }
303
305 {
308
309 VectorRegister4f Sum = VectorAdd(L, R);
310
311 FSimd4Realf Out;
312 VectorStoreAligned(Sum, Out.V);
313 return Out;
314 }
315
332
334 {
337
339
340 FSimd4Realf Out;
341 VectorStoreAligned(Diff, Out.V);
342 return Out;
343 }
344
361
363 {
366
367 VectorRegister4f Product = VectorMultiply(L, R);
368
369 FSimd4Realf Out;
370 VectorStoreAligned(Product, Out.V);
371 return Out;
372 }
373
387
389 {
390 return SimdMultiply(R, L);
391 }
392
409
430
432 {
435
436 VectorRegister4f Ratio = VectorDivide(L, R);
437
438 FSimd4Realf Out;
439 VectorStoreAligned(Ratio, Out.V);
440 return Out;
441 }
442
459
478
480 {
482
483 VectorRegister4f Product = VectorMultiply(V, V);
484
485 FSimd4Realf Out;
486 VectorStoreAligned(Product, Out.V);
487 return Out;
488 }
489
491 {
493
495
496 FSimd4Realf Out;
498 return Out;
499 }
500
511
523
535
537 //
538 // 4-wide Gather/Scatter operations
539 //
541
542 // Convert 4 row-vectors into 3 column-vectors
543 // NOTE: The input vectors must be 16-byte aligned and padded to 16 bytes to avoid reading past valid memory
545 {
546 VectorRegister4f A = VectorLoadAligned(&InA.X); // Ax Ay Az Aw
547 VectorRegister4f B = VectorLoadAligned(&InB.X); // Bx By Bz Bw
548 VectorRegister4f C = VectorLoadAligned(&InC.X); // Cx Cy Cz Cw
549 VectorRegister4f D = VectorLoadAligned(&InD.X); // Dx Dy Dz Dw
550
551 // This can be done with fewer registers, but the compiler should figure that out
552 // and its much easier to follow when left like this...
553 VectorRegister4f P = VectorUnpackLo(A, C); // Ax Cx Ay Cy
554 VectorRegister4f Q = VectorUnpackLo(B, D); // Bx Dx By Dy
555 VectorRegister4f R = VectorUnpackHi(A, C); // Az Cz Aw Cw
556 VectorRegister4f S = VectorUnpackHi(B, D); // Bz Dz Bw Dw
557
558 VectorRegister4f X = VectorUnpackLo(P, Q); // Ax Bx Cx Dx
559 VectorRegister4f Y = VectorUnpackHi(P, Q); // Ay By Cy Dy
560 VectorRegister4f Z = VectorUnpackLo(R, S); // Az Bz Cz Dz
561
562 FSimd4Vec3f Out;
563 VectorStoreAligned(X, Out.VX);
564 VectorStoreAligned(Y, Out.VY);
565 VectorStoreAligned(Z, Out.VZ);
566 return Out;
567 }
568
569 }
570}
#define FORCEINLINE
Definition AndroidPlatform.h:140
FPlatformTypes::int32 int32
A 32-bit signed integer.
Definition Platform.h:1125
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
FORCEINLINE VectorRegister4Int MakeVectorRegisterInt(int32 X, int32 Y, int32 Z, int32 W)
Definition UnrealMathFPU.h:282
FORCEINLINE VectorRegister4Float VectorSubtract(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:731
FORCEINLINE VectorRegister4Double VectorLoadFloat3(const double *Ptr)
Definition UnrealMathFPU.h:427
FORCEINLINE VectorRegister4Float VectorSqrt(const VectorRegister4Float &Vec)
Definition UnrealMathFPU.h:1263
FORCEINLINE VectorRegister4Float VectorReciprocalSqrt(const VectorRegister4Float &Vec)
Definition UnrealMathFPU.h:1279
FORCEINLINE VectorRegister4Float VectorMin(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:1686
FORCEINLINE VectorRegister4Float MakeVectorRegister(uint32 X, uint32 Y, uint32 Z, uint32 W)
Definition UnrealMathFPU.h:195
FORCEINLINE void VectorIntStoreAligned(const VectorRegister4Int &A, const void *Ptr)
Definition UnrealMathFPU.h:2578
FORCEINLINE VectorRegister4Float VectorSetFloat1(float F)
Definition UnrealMathFPU.h:518
FORCEINLINE VectorRegister4Float VectorDivide(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:834
FORCEINLINE VectorRegister4Float VectorMultiply(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:758
FORCEINLINE VectorRegister4Float VectorMax(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:1713
FORCEINLINE VectorRegister4Float VectorBitwiseAnd(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:1165
FORCEINLINE VectorRegister4Int VectorIntCompareGE(const VectorRegister4Int &A, const VectorRegister4Int &B)
Definition UnrealMathFPU.h:2392
VectorRegister4Float VectorLoadAligned(const float *Ptr)
Definition UnrealMathFPU.h:451
FORCEINLINE VectorRegister4Float VectorMultiplyAdd(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2, const VectorRegister4Float &Vec3)
Definition UnrealMathFPU.h:786
FORCEINLINE VectorRegister4Float VectorSelect(const VectorRegister4Float &Mask, const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:1105
FORCEINLINE VectorRegister4Float VectorCompareGT(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:974
FORCEINLINE VectorRegister4Float VectorCompareGE(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:1000
FORCEINLINE VectorRegister4Float VectorCompareLT(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:1025
FORCEINLINE int32 VectorMaskBits(const VectorRegister4Float &Vec1)
Definition UnrealMathFPU.h:1075
FORCEINLINE VectorRegister4Float VectorNegate(const VectorRegister4Float &Vec)
Definition UnrealMathFPU.h:687
FORCEINLINE VectorRegister4Float VectorAdd(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:704
void VectorStoreAligned(const VectorRegister4Float &Vec, float *Ptr)
Definition UnrealMathFPU.h:534
FORCEINLINE VectorRegister4Float VectorOneFloat(void)
Definition UnrealMathFPU.h:346
FORCEINLINE VectorRegister4Float VectorZeroFloat(void)
Definition UnrealMathFPU.h:331
FORCEINLINE VectorRegister4Int VectorIntLoadAligned(const void *Ptr)
Definition UnrealMathFPU.h:2593
FORCEINLINE VectorRegister4Float VectorBitwiseOr(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:1140
FORCEINLINE VectorRegister4Float VectorCompareEQ(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:923
FORCEINLINE VectorRegister4Int VectorIntCompareLT(const VectorRegister4Int &A, const VectorRegister4Int &B)
Definition UnrealMathFPU.h:2383
#define VectorReplicate(Vec, ElementIndex)
Definition UnrealMathFPU.h:627
FORCEINLINE VectorRegister4Int VectorIntCompareGT(const VectorRegister4Int &A, const VectorRegister4Int &B)
Definition UnrealMathFPU.h:2374
FORCEINLINE VectorRegister4Float VectorCompareNE(const VectorRegister4Float &Vec1, const VectorRegister4Float &Vec2)
Definition UnrealMathFPU.h:948
FORCEINLINE VectorRegister4Float VectorBitwiseNotAnd(const VectorRegister4Float &A, const VectorRegister4Float &B)
Definition VectorUtility.h:289
FORCEINLINE VectorRegister4Float VectorUnpackHi(const VectorRegister4Float &A, const VectorRegister4Float &B)
Definition VectorUtility.h:159
FORCEINLINE VectorRegister4Float VectorUnpackLo(const VectorRegister4Float &A, const VectorRegister4Float &B)
Definition VectorUtility.h:112
uint32_t uint32
Definition binka_ue_file_header.h:6
Definition Vector.h:407
FORCEINLINE bool SimdAllTrue(const FSimd4Selector &InL)
Definition Simd4.h:135
FORCEINLINE FSimd4Realf SimdMax(const FSimd4Realf &InL, const FSimd4Realf &InR)
Definition Simd4.h:524
FORCEINLINE FSimd4Realf SimdSquare(const FSimd4Realf &InV)
Definition Simd4.h:479
FORCEINLINE FSimd4Selector SimdEqual(const FSimd4Realf &InL, const FSimd4Realf &InR)
Definition Simd4.h:176
FORCEINLINE FSimd4Realf SimdMultiply(const FSimd4Realf &InL, const FSimd4Realf &InR)
Definition Simd4.h:362
FORCEINLINE FSimd4Vec3f SimdMultiplyAdd(const FSimd4Vec3f &L, const FSimd4Vec3f &R, const FSimd4Vec3f &Acc)
Definition Simd4.h:410
FORCEINLINE FSimd4Selector SimdLess(const FSimd4Int32 &InL, const FSimd4Int32 &InR)
Definition Simd4.h:250
FORCEINLINE FSimd4Realf SimdDivide(const FSimd4Realf &InL, const FSimd4Realf &InR)
Definition Simd4.h:431
FORCEINLINE FSimd4Realf SimdMin(const FSimd4Realf &InL, const FSimd4Realf &InR)
Definition Simd4.h:512
FORCEINLINE FSimd4Realf SimdDotProduct(const FSimd4Vec3f &L, const FSimd4Vec3f &R)
Definition Simd4.h:460
FORCEINLINE FSimd4Selector SimdNotEqual(const FSimd4Realf &InL, const FSimd4Realf &InR)
Definition Simd4.h:188
FORCEINLINE FSimd4Selector SimdAnd(const FSimd4Selector &InL, const FSimd4Selector &InR)
Definition Simd4.h:164
FORCEINLINE FSimd4Realf SimdSubtract(const FSimd4Realf &InL, const FSimd4Realf &InR)
Definition Simd4.h:333
FORCEINLINE FSimd4Realf SimdAdd(const FSimd4Realf &InL, const FSimd4Realf &InR)
Definition Simd4.h:304
FORCEINLINE FSimd4Realf SimdSqrt(const FSimd4Realf &InV)
Definition Simd4.h:490
FORCEINLINE FSimd4Selector SimdOr(const FSimd4Selector &InL, const FSimd4Selector &InR)
Definition Simd4.h:152
FORCEINLINE FSimd4Selector SimdGreaterEqual(const FSimd4Int32 &InL, const FSimd4Int32 &InR)
Definition Simd4.h:200
FORCEINLINE FSimd4Selector SimdNot(const FSimd4Selector &InL)
Definition Simd4.h:141
FORCEINLINE FSimd4Realf SimdSelect(const FSimd4Selector &InSelector, const FSimd4Realf &InL, const FSimd4Realf &InR)
Definition Simd4.h:274
FORCEINLINE bool SimdAnyTrue(const FSimd4Selector &InL)
Definition Simd4.h:129
FORCEINLINE FSimd4Selector SimdGreater(const FSimd4Int32 &InL, const FSimd4Int32 &InR)
Definition Simd4.h:225
FORCEINLINE FSimd4Realf SimdInvSqrt(const FSimd4Realf &InV)
Definition Simd4.h:501
FORCEINLINE FSimd4Realf SimdNegate(const FSimd4Realf &InL)
Definition Simd4.h:293
FORCEINLINE FSimd4Vec3f SimdCrossProduct(const FSimd4Vec3f &L, const FSimd4Vec3f &R)
Definition Simd4.h:443
FORCEINLINE FSimd4Vec3f SimdGatherAligned(const FVec3f &InA, const FVec3f &InB, const FVec3f &InC, const FVec3f &InD)
Definition Simd4.h:544
Definition SkeletalMeshComponent.h:307
@ Y
Definition SimulationModuleBase.h:153
@ X
Definition SimulationModuleBase.h:152
constexpr VectorRegister4Int IntZero
Definition UnrealMathVectorConstants.h.inl:79
constexpr VectorRegister4Int IntAllMask
Definition UnrealMathVectorConstants.h.inl:110
VectorRegister4Float AllMask()
Definition UnrealMathVectorConstants.h.inl:111
Definition OverriddenPropertySet.cpp:45
Definition SimdTypes.h:127
int32 V[TNumLanes]
Definition SimdTypes.h:128
static TSimdInt32< TNumLanes > Make(const int32 I)
static TSimdInt32< TNumLanes > Zero()
Definition SimdTypes.h:168
static TSimdRealf Make(const float F)
static TSimdRealf Zero()
float V[TNumLanes]
Definition SimdTypes.h:169
static TSimdRealf One()
Definition SimdTypes.h:75
static TSimdSelector< TNumLanes > True()
static TSimdSelector< TNumLanes > False()
float V[TNumLanes]
Definition SimdTypes.h:76
Definition SimdTypes.h:204
float VZ[TNumLanes]
Definition SimdTypes.h:209
float VX[TNumLanes]
Definition SimdTypes.h:207
float VY[TNumLanes]
Definition SimdTypes.h:208
static FORCEINLINE TSimdVec3f Make(const FVec3f &V)
Definition SimdTypes.h:237
Definition UnrealMathFPU.h:20
Definition UnrealMathFPU.h:28
int32 V[4]
Definition UnrealMathFPU.h:29