UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
VectorVMPlatformGeneric.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2
3#pragma once
4
6
7#if PLATFORM_CPU_X86_FAMILY || defined(__SSE3__)
8
9#define VVM_pshufb(Src, Mask) _mm_shuffle_epi8(Src, Mask)
10 // Fabian's round-to-nearest-even float to half
11static void VVM_floatToHalf(void* output, float const* input)
12{
13 static const MS_ALIGN(16) unsigned int mask_sign[4] GCC_ALIGN(16) = { 0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u };
14 static const MS_ALIGN(16) int c_f16max[4] GCC_ALIGN(16) = { (127 + 16) << 23, (127 + 16) << 23, (127 + 16) << 23, (127 + 16) << 23 }; // all FP32 values >=this round to +inf
15 static const MS_ALIGN(16) int c_nanbit[4] GCC_ALIGN(16) = { 0x200, 0x200, 0x200, 0x200 };
16 static const MS_ALIGN(16) int c_infty_as_fp16[4] GCC_ALIGN(16) = { 0x7c00, 0x7c00, 0x7c00, 0x7c00 };
17 static const MS_ALIGN(16) int c_min_normal[4] GCC_ALIGN(16) = { (127 - 14) << 23, (127 - 14) << 23, (127 - 14) << 23, (127 - 14) << 23 }; // smallest FP32 that yields a normalized FP16
18 static const MS_ALIGN(16) int c_subnorm_magic[4] GCC_ALIGN(16) = { ((127 - 15) + (23 - 10) + 1) << 23, ((127 - 15) + (23 - 10) + 1) << 23, ((127 - 15) + (23 - 10) + 1) << 23, ((127 - 15) + (23 - 10) + 1) << 23 };
19 static const MS_ALIGN(16) int c_normal_bias[4] GCC_ALIGN(16) = { 0xfff - ((127 - 15) << 23), 0xfff - ((127 - 15) << 23), 0xfff - ((127 - 15) << 23), 0xfff - ((127 - 15) << 23) }; // adjust exponent and add mantissa rounding
20
21
26 __m128i absf_int = _mm_castps_si128(absf); // the cast is "free" (extra bypass latency, but no thruput hit)
28 __m128 b_isnan = _mm_cmpunord_ps(absf, absf); // is this a NaN?
29 __m128i b_isregular = _mm_cmpgt_epi32(f16max, absf_int); // (sub)normalized or special?
32
35
36 // "result is subnormal" path
37 __m128 subnorm1 = _mm_add_ps(absf, _mm_castsi128_ps(*(VectorRegister4i*)c_subnorm_magic)); // magic value to round output mantissa
39
40 // "result is normal" path
41 __m128i mantoddbit = _mm_slli_epi32(absf_int, 31 - 13); // shift bit 13 (mantissa LSB) to sign
42 __m128i mantodd = _mm_srai_epi32(mantoddbit, 31); // -1 if FP16 mantissa odd, else 0
43
45 __m128i round2 = _mm_sub_epi32(round1, mantodd); // if mantissa LSB odd, bias towards rounding up (RTNE)
46 __m128i normal = _mm_srli_epi32(round2, 13); // rounded result
47
48 // combine the two non-specials
50
51 // merge in specials as well
53
56
59}
60
62{
63 uint32* v0_4 = (uint32*)&v0;
64 uint32* v1_4 = (uint32*)&v1;
65
67
68 res.u4[0] = v0_4[0] >> v1_4[0];
69 res.u4[1] = v0_4[1] >> v1_4[1];
70 res.u4[2] = v0_4[2] >> v1_4[2];
71 res.u4[3] = v0_4[3] >> v1_4[3];
72
73 return res.v;
74}
75
77{
78 uint32* v0_4 = (uint32*)&v0;
79 uint32* v1_4 = (uint32*)&v1;
80
82
83 res.u4[0] = v0_4[0] << v1_4[0];
84 res.u4[1] = v0_4[1] << v1_4[1];
85 res.u4[2] = v0_4[2] << v1_4[2];
86 res.u4[3] = v0_4[3] << v1_4[3];
87
88 return res.v;
89}
90
91#endif // PLATFORM_CPU_X86_FAMILY || defined(__SSE3__)
#define GCC_ALIGN(n)
Definition AndroidPlatform.h:163
#define MS_ALIGN(n)
Definition Platform.h:916
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
#define VM_FORCEINLINE
Definition VectorVM.h:15
uint32_t uint32
Definition binka_ue_file_header.h:6
Definition VectorVMPlatformBase.h:9
uint32 u4[4]
Definition VectorVMPlatformBase.h:13
Definition UnrealMathFPU.h:28