UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
rrbits.h
Go to the documentation of this file.
1// Copyright Epic Games Tools, LLC. All Rights Reserved.
2// This source file is licensed solely to users who have
3// accepted a valid Unreal Engine license agreement
4// (see e.g., https://www.unrealengine.com/eula), and use
5// of this source file is governed by such agreement.
6
7#ifndef __RADRR_BITSH__
8#define __RADRR_BITSH__
9
10#include "rrCore.h"
11
12//===================================================================================
13// Bit manipulation tools
14
15// Count leading zeros / count trailing zeros. All of these are undefined for input
16// arguments of 0. On x86, BSF/BSR have undefined results for x=0; on ARM and PPC which
17// provide "count leading zeros" but not "count trailing zeros", it's much easier to
18// give a version of CTZ that is correct only for x != 0. These functions are interesting
19// because they're fast, so try to be fast.
20//
21// Put the prototypes here for quick reference:
22
23static U32 rrClz32(U32 val); // count leading zero bits of U32 (val != 0)
24static U32 rrClz64(U64 val); // count leading zero bits of U64 (val != 0)
25static U32 rrCtz32(U32 val); // count trailing zero bits of U32 (val != 0)
26static U32 rrCtz64(U64 val); // count trailing zero bits of U64 (val != 0)
27
28static U32 rrClzBytes32(U32 val); // count leading zero bytes of U32 (val != 0)
29static U32 rrClzBytes64(U64 val); // count leading zero bytes of U64 (val != 0)
30static U32 rrCtzBytes32(U32 val); // count trailing zero bytes of U32 (val != 0)
31static U32 rrCtzBytes64(U64 val); // count trailing zero bytes of U64 (val != 0)
32
33// Generic bit manipulation helpers. These are standard helpers that have dedicated
34// instructions on x86 CPUs with BMI1 support, but their regular expansions are fast
35// everywhere, and these functions are provided mainly for readability.
36//
37// Distinct names for 32- and 64-bit versions to make overload resolution clear since
38// these are sometimes used on signed int types.
39
40// Mask of lowest set bit in val. Return 0 if val=0, else a value with a single bit
41// set.
42static RADINLINE U32 rrLowestSetBitMask32(U32 val) { return val & (0 - val); }
43static RADINLINE U64 rrLowestSetBitMask64(U64 val) { return val & (0 - val); }
44
45// Mask up to and including the lowest set bit in val. Returns 0 if val=0.
46static RADINLINE U32 rrMaskThroughToLowestSet32(U32 val) { return val ^ (val - 1); }
47static RADINLINE U64 rrMaskThroughToLowestSet64(U64 val) { return val ^ (val - 1); }
48
49// Clears the lowest set bit in val. Returns 0 if val=0.
50static RADINLINE U32 rrClearLowestSetBit32(U32 val) { return val & (val - 1); }
51static RADINLINE U64 rrClearLowestSetBit64(U64 val) { return val & (val - 1); }
52
53#if defined(__GNUC__) || defined(__clang__)
54
55 // GCC-esque compilers just provide these built-ins everywhere.
56 static RADINLINE U32 rrClz32(U32 val) { return __builtin_clz(val); }
57 static RADINLINE U32 rrClz64(U64 val) { return __builtin_clzll(val); }
58
59 static RADINLINE U32 rrCtz32(U32 val) { return __builtin_ctz(val); }
60 static RADINLINE U32 rrCtz64(U64 val) { return __builtin_ctzll(val); }
61
62 #define SYNTHESIZE_BYTE_FUNCS
63
64#elif defined(_MSC_VER)
65
66 #if defined(__RADARM__) && defined(_WIN32_WCE)
67
68 // Don't have CLZ or anything similar here, use fall-back.
69 #define SYNTHESIZE_ALL
70
71 #elif defined(__RADARM64__) // needs to come before __RADARM__, we set both and MSVC changes intrinsic names for AArch64
72
73 #include <intrin.h>
74
75 static RADINLINE U32 rrClz32(U32 val) { return _CountLeadingZeros(val); }
76 static RADINLINE U32 rrClz64(U64 val) { return _CountLeadingZeros64(val); }
77
78 // Strategy for CTZ: "x & -x" isolates least-significant set bit, then use
79 // CLZ to infer trailing zero count.
80 static RADINLINE U32 rrCtz32(U32 val) { return 31 - rrClz32(val & (0u - val)); }
81 static RADINLINE U32 rrCtz64(U64 val) { return 63 - rrClz64(val & (0ull - val)); }
82
83 #define SYNTHESIZE_BYTE_FUNCS
84
85 #elif defined(__RADARM__)
86
87 #include <intrin.h>
88
89 static RADINLINE U32 rrClz32(U32 val) { return _arm_clz(val); }
90 static RADINLINE U32 rrClz64(U64 val) { U32 hi = (U32) (val >> 32); return hi ? rrClz32(hi) : 32 + rrClz32((U32) val); }
91
92 // Strategy for CTZ: "x & -x" isolates least-significant set bit, then use
93 // CLZ to infer trailing zero count.
94 static RADINLINE U32 rrCtz32(U32 val) { return 31 - rrClz32(val & (0u - val)); }
95 static RADINLINE U32 rrCtz64(U64 val) { return 63 - rrClz64(val & (0ull - val)); }
96
97 #define SYNTHESIZE_BYTE_FUNCS
98
99 #elif defined(__RADPPC__)
100
101 #include <PPCIntrinsics.h>
102
103 static RADINLINE U32 rrClz32(U32 val) { return _CountLeadingZeros(val); }
104 static RADINLINE U32 rrClz64(U64 val) { return _CountLeadingZeros64(val); }
105
106 // Strategy for CTZ: "x & -x" isolates least-significant set bit, then use
107 // CLZ to infer trailing zero count.
108 static RADINLINE U32 rrCtz32(U32 val) { return 31 - rrClz32(val & (0u - val)); }
109 static RADINLINE U32 rrCtz64(U64 val) { return 63 - rrClz64(val & (0ull - val)); }
110
111 #define SYNTHESIZE_BYTE_FUNCS
112
113 #elif defined(__RADX64__) && (defined(__RADJAGUAR__) || defined(__AVX2__)) // NOTE(fg): __AVX2__ set by compiler. TUs compiling with -mavx2 or /arch:AVX2 know that LZCNT/TZCNT are available
114
115 #include <immintrin.h>
116
117 static RADINLINE U32 rrClz32(U32 val) { return _lzcnt_u32(val); }
118 static RADINLINE U32 rrClz64(U64 val) { return (U32) _lzcnt_u64(val); }
119
120 static RADINLINE U32 rrCtz32(U32 val) { return _tzcnt_u32(val); }
121 static RADINLINE U32 rrCtz64(U64 val) { return (U32) _tzcnt_u64(val); }
122
123 #define SYNTHESIZE_BYTE_FUNCS
124
125 #elif defined(__RADX64__)
126
127 #include <intrin.h>
128
129 static RADINLINE U32 rrClz32(U32 val) { unsigned long idx; _BitScanReverse(&idx, val); return 31 - idx; }
130 static RADINLINE U32 rrClz64(U64 val) { unsigned long idx; _BitScanReverse64(&idx, val); return 63 - idx; }
131
132 static RADINLINE U32 rrCtz32(U32 val) { unsigned long idx; _BitScanForward(&idx, val); return idx; }
133 static RADINLINE U32 rrCtz64(U64 val) { unsigned long idx; _BitScanForward64(&idx, val); return idx; }
134
135 #define SYNTHESIZE_BYTE_FUNCS
136
137 #elif defined(__RADX86__)
138
139 #include <intrin.h>
140
141 static RADINLINE U32 rrClz32(U32 val) { unsigned long idx; _BitScanReverse(&idx, val); return 31 - idx; }
142 static RADINLINE U32 rrClz64(U64 val) { U32 hi = (U32) (val >> 32); return hi ? rrClz32(hi) : 32 + rrClz32((U32) val); }
143
144 static RADINLINE U32 rrCtz32(U32 val) { unsigned long idx; _BitScanForward(&idx, val); return idx; }
145 static RADINLINE U32 rrCtz64(U64 val) { U32 lo = (U32) val; return lo ? rrCtz32(lo) : 32 + rrCtz32((U32) (val >> 32)); }
146
147 #define SYNTHESIZE_BYTE_FUNCS
148
149 #else
150
151 #error Unknown MSVC target
152
153 #endif
154
155#else
156
157 #error Implement rrBits for this target
158
159#endif
160
161#ifdef SYNTHESIZE_BYTE_FUNCS // Byte funcs from bit funcs
162
163// Count leading/trailing zero bytes
164// Same as the bit funcs, behavior for val=0 is not specified!
165static RADINLINE U32 rrClzBytes32(U32 val) { return rrClz32(val) >> 3; }
166static RADINLINE U32 rrClzBytes64(U64 val) { return rrClz64(val) >> 3; }
167
168static RADINLINE U32 rrCtzBytes32(U32 val) { return rrCtz32(val) >> 3; }
169static RADINLINE U32 rrCtzBytes64(U64 val) { return rrCtz64(val) >> 3; }
170
171#undef SYNTHESIZE_BYTE_FUNCS
172
173#endif // SYNTHESIZE_BYTE_FUNCS
174
175#ifdef SYNTHESIZE_ALL // Full SW fallback.
176
177static RADINLINE U32 rrClz32(U32 val)
178{
179 // 4-clz4(x)
180 static U8 const lut[16] = { 0,1,2,2, 3,3,3,3, 4,4,4,4, 4,4,4,4 };
181
182 U32 nz = 32;
183 if (val & 0xffff0000u) { nz -= 16; val >>= 16; }
184 if (val & 0x0000ff00u) { nz -= 8; val >>= 8; }
185 if (val & 0x000000f0u) { nz -= 4; val >>= 4; }
186 return nz - lut[val & 0xf];
187}
188
189static RADINLINE U32 rrCtz32(U32 val)
190{
191 // ctz4(x)
192 static U8 const lut[16] = { 4,0,1,0, 2,0,1,0, 3,0,1,0, 2,0,1,0 };
193
194 U32 nz = 0;
195 if ((val & 0xffff) == 0) { nz += 16; val >>= 16; }
196 if ((val & 0x00ff) == 0) { nz += 8; val >>= 8; }
197 if ((val & 0x000f) == 0) { nz += 4; val >>= 4; }
198 return nz + lut[val & 0xf];
199}
200
201static RADINLINE U32 rrClz64(U64 val) { U32 hi = (U32) (val >> 32); return hi ? rrClz32(hi) : 32 + rrClz32((U32) val); }
202static RADINLINE U32 rrCtz64(U64 val) { U32 lo = (U32) val; return lo ? rrCtz32(lo) : 32 + rrCtz32((U32) (val >> 32)); }
203
204// Count leading/trailing zero bytes
205// Same as the bit funcs, behavior for val=0 is not specified!
206static RADINLINE U32 rrClzBytes32(U32 val)
207{
208 // Don't get fancy here. Assumes val != 0.
209 if (val & 0xff000000u) return 0;
210 if (val & 0x00ff0000u) return 1;
211 if (val & 0x0000ff00u) return 2;
212 return 3;
213}
214
215static RADINLINE U32 rrCtzBytes32(U32 val)
216{
217 // Don't get fancy here. Assumes val != 0.
218 if (val & 0x000000ffu) return 0;
219 if (val & 0x0000ff00u) return 1;
220 if (val & 0x00ff0000u) return 2;
221 return 3;
222}
223
224static RADINLINE U32 rrClzBytes64(U64 val) { U32 hi = (U32) (val >> 32); return hi ? rrClzBytes32(hi) : 4 + rrClzBytes32((U32) val); }
225static RADINLINE U32 rrCtzBytes64(U64 val) { U32 lo = (U32) val; return lo ? rrCtzBytes32(lo) : 4 + rrCtzBytes32((U32) (val >> 32)); }
226
227#undef SYNTHESIZE_ALL
228
229#endif // SYNTHESIZE_ALL
230
231#endif // __RADRR_BITSH__
RAD_U32 U32
Definition egttypes.h:501
RAD_U8 U8
Definition egttypes.h:481
RAD_U64 U64
Definition egttypes.h:511
#define RADINLINE
Definition egttypes.h:387
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127