UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
PM-k.h
Go to the documentation of this file.
1// ================================================================================
2// Software Name: pm4-bitap.c
3// Version: V1.0
4// URL: https://www.genivia.com/files/BSD-3.txt
5// ===========================================================================================
6// BSD 3-Clause License
7//
8// Copyright (c) 2023, Robert van Engelen, Genivia Inc.
9// All rights reserved.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are met:
13//
14// 1. Redistributions of source code must retain the above copyright notice, this
15// list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright notice,
18// this list of conditions and the following disclaimer in the documentation
19// and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the copyright holder nor the names of its
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
29// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
36#pragma once
37#include "CoreTypes.h"
38#include "HAL/UnrealMemory.h"
39
54{
55 static constexpr uint16 TABLE_SIZE = 256;
56 static constexpr uint16 ALPHABET_SIZE = 256;
57
59 {
60 Reset();
61 }
62
64 {
65 return MinimumWordLength;
66 }
67
68 void AddPredictionWord(const uint8* Data, const uint16 DataLen)
69 {
70 uint8 Bytes[8] = {};
71
73 switch (DataLen)
74 {
75 default:
76 Bytes[7] = Data[7];
77 case 7:
78 Bytes[6] = Data[6];
79 case 6:
80 Bytes[5] = Data[5];
81 case 5:
82 Bytes[4] = Data[4];
83 case 4:
84 Bytes[3] = Data[3];
85 case 3:
86 Bytes[2] = Data[2];
87 case 2:
88 Bytes[1] = Data[1];
89 case 1:
90 Bytes[0] = Data[0];
91 }
92
93 const uint16 Hash1 = HashFn(Bytes[0], Bytes[1]);
94 const uint16 Hash2 = HashFn(Hash1, Bytes[2]);
95 const uint16 Hash3 = HashFn(Hash2, Bytes[3]);
96 const uint16 Hash4 = HashFn(Hash3, Bytes[4]);
97 const uint16 Hash5 = HashFn(Hash4, Bytes[5]);
98 const uint16 Hash6 = HashFn(Hash5, Bytes[6]);
99 const uint16 Hash7 = HashFn(Hash6, Bytes[7]);
100
101 // For each character, store a pair of bits indicating a match (even value, b10)
102 // or an accept (00, even value < 2) into the table at increasing offsets into out 16-bit matching window, with each
103 // character stored into a different bucket (due to hashing). The last character for our added word is always
104 // 00 by definition.
105 //
106 // e.g Adding 5-letter prediction word "apple"
107 // character pos
108 // 0 1 2 3 4 5 6 7
109 // bucket['a'] 10 bb bb bb bb bb bb bb
110 // bucket[h('p')] bb 10 bb bb bb bb bb bb
111 // bucket[h('p')] bb bb 10 bb bb bb bb bb
112 // bucket[h('l')] bb bb bb 10 bb bb bb bb
113 // bucket[h('e')] bb bb bb bb 00 bb bb bb <-- last character is match and accept
114
115 PredictMatchTable[Bytes[0]] &= (0x3FFF | ((DataLen > 1) << 15));
116 PredictMatchTable[Hash1] &= (DataLen > 1 ? (0xCFFF | ((DataLen > 2) << 13)) : 0xFFFF);
117 PredictMatchTable[Hash2] &= (DataLen > 2 ? (0xF3FF | ((DataLen > 3) << 11)) : 0xFFFF);
118 PredictMatchTable[Hash3] &= (DataLen > 3 ? (0xFCFF | ((DataLen > 4) << 9)) : 0xFFFF);
119 PredictMatchTable[Hash4] &= (DataLen > 4 ? (0xFF3F | ((DataLen > 5) << 7)) : 0xFFFF);
120 PredictMatchTable[Hash5] &= (DataLen > 5 ? (0xFFCF | ((DataLen > 6) << 5)) : 0xFFFF);
121 PredictMatchTable[Hash6] &= (DataLen > 6 ? (0xFFF3 | ((DataLen > 7) << 3)) : 0xFFFF);
122 PredictMatchTable[Hash7] &= (DataLen > 7 ? (0xFFFC) : 0xFFFF);
123
124 MinimumWordLength = MinimumWordLength < DataLen ? MinimumWordLength : DataLen;
125 for (int i = 0; i < MinimumWordLength; ++i)
126 {
127 const uint8 Byte = Data[i];
128 BitApproxTable[Byte] &= ~(1 << i);
129 }
130 }
131
132 bool MatchApproximate(const uint8* Data, const uint32 DataLen) const
133 {
134 check(DataLen);
135
136 // Note this mask is not what is normally expected.
137 // This mask is used to check if our sliding window of bits contains
138 // a 0 in bit position MinimumWordLength which would indicate a potential match
139 const uint16 BitApproxMask = (uint16) (1 << (MinimumWordLength - 1));
140 const uint8* const DataEnd = Data + DataLen;
141
142 // Start with no matching bits (all 1)
143 uint16 Bits = ~0;
144 for (;;)
145 {
146 // Continue scanning until we either run out of bytes or we find an approximate bit match
147 // Shift left and OR our sliding window of potential matches. BitApproxTable[*Data] OR'd
148 // with Bits will either keep sliding a 0 bit left indicating we have a fuzzy match, or as we
149 // OR values from BitApproxTable[*Data], we will stomp over the sliding 0 with a 1 indicating
150 // no match and the matching process starts over by sliding in a 0 with the next left shift
151 while ((Data < DataEnd) && (((Bits = (uint16)(Bits << 1) | BitApproxTable[*Data]) & BitApproxMask) != 0))
152 {
153 Data++;
154 }
155
156 if (Data >= DataEnd)
157 {
158 return false;
159 }
160
161 // The Bitap scanning above has indicated we have a potential match, but now defer
162 // to PredictMatch to further refine our prediction, since Bitap operates window sizes of
163 // the smallest substring which might be quite small compared to the substrings we are searching for.
164 const uint8* PredictionStart = Data - MinimumWordLength + 1;
165 if (PredictMatch(PredictMatchTable, PredictionStart, (uint32)(DataEnd - PredictionStart)))
166 {
167 return true;
168 }
169 ++Data;
170 }
171 }
172
173 void Reset()
174 {
175 // Can be no greater than the number of bits in a BitApproxTable element. This value will shrink if we are given a smaller substring
176 MinimumWordLength = sizeof(BitApproxTable[0]) * 8;
177
178 FMemory::Memset(BitApproxTable, 0xFF, sizeof(BitApproxTable));
179 FMemory::Memset(PredictMatchTable, 0xFF, sizeof(PredictMatchTable));
180 }
181
182private:
183 static uint16 HashFn(const uint16 A, const uint8 B)
184 {
185 return (uint16)((((uint16)(A) << 3) ^ (uint16)(B)) & (TABLE_SIZE - 1));
186 }
187
188 static bool PredictMatch(const uint16* RESTRICT PredictMatchTable, const uint8* RESTRICT Data, const uint32 DataLen)
189 {
190 uint8 Bytes[8] = {};
191
192 check(DataLen);
193 switch (DataLen)
194 {
195 default:
196 Bytes[7] = Data[7];
197 case 7:
198 Bytes[6] = Data[6];
199 case 6:
200 Bytes[5] = Data[5];
201 case 5:
202 Bytes[4] = Data[4];
203 case 4:
204 Bytes[3] = Data[3];
205 case 3:
206 Bytes[2] = Data[2];
207 case 2:
208 Bytes[1] = Data[1];
209 case 1:
210 Bytes[0] = Data[0];
211 }
212
213 /*
214 * Branchless implementation of the following logic:
215 *
216 * if Accept(Bytes[0], 0) then return TRUE // If we have a substring that ends with Bytes[0] at position 0
217 * if Match(Bytes[0], 0) then // Otherwise, If we have a substring that has Bytes[0] at position 0
218 * if Accept(Bytes[1], 1) then return TRUE
219 * if Match(Bytes[1], 1) then
220 * ...
221 * if Accept(Bytes[6], 6) then return TRUE
222 * if Match(Bytes[6], 6) then
223 * // It's the last character in the window so no need to check Accept(Bytes[7], 7)
224 * if Matchbit(Bytes[7], 7) then return TRUE
225 */
226 const uint16 Hash1 = HashFn(Bytes[0], Bytes[1]);
227 const uint16 Hash2 = HashFn(Hash1, Bytes[2]);
228 const uint16 Hash3 = HashFn(Hash2, Bytes[3]);
229 const uint16 Hash4 = HashFn(Hash3, Bytes[4]);
230 const uint16 Hash5 = HashFn(Hash4, Bytes[5]);
231 const uint16 Hash6 = HashFn(Hash5, Bytes[6]);
232 const uint16 Hash7 = HashFn(Hash6, Bytes[7]);
233
234 const uint16 AcceptBit0 = PredictMatchTable[Bytes[0]];
235 const uint16 AcceptBit1 = PredictMatchTable[Hash1];
236 const uint16 AcceptBit2 = PredictMatchTable[Hash2];
237 const uint16 AcceptBit3 = PredictMatchTable[Hash3];
238 const uint16 AcceptBit4 = PredictMatchTable[Hash4];
239 const uint16 AcceptBit5 = PredictMatchTable[Hash5];
240 const uint16 AcceptBit6 = PredictMatchTable[Hash6];
241 const uint16 AcceptBit7 = PredictMatchTable[Hash7];
242
243 const uint16 Bits =
244 (AcceptBit0 & 0xC000) | (AcceptBit1 & 0x3000) | (AcceptBit2 & 0x0C00) | (AcceptBit3 & 0x0300) |
245 (AcceptBit4 & 0x00C0) | (AcceptBit5 & 0x0030) | (AcceptBit6 & 0x000C) | (AcceptBit7 & 0x0003);
246 const uint16 MatchBits = ((((((((((((((Bits >> 2) | Bits) >> 2) | Bits) >> 2) | Bits) >> 2) | Bits) >> 2) | Bits) >> 2) | Bits) >> 1) | Bits);
247
248 // all two-bit pairs are set meaning no matches (no even valued pairs)
249 return MatchBits != 0xFFFF;
250 }
251
252 uint16 MinimumWordLength;
253
254 // Table describing for character 'x', BitApproxTable[x] returns a value (in this case 16 bits)
255 // where a 0 in the nth bit implies you may have a match if 'n' consecutive possible matches have
256 // been seen when scanning a string of characters.
257 uint16 BitApproxTable[ALPHABET_SIZE];
258
259 // Table encoding a 'match'and 'accept' value using two bits for each character in a window size of 8
260 // PredictMatch relies on two pieces of information:
261 // Match(x,n) == 1, meaning any of our prediction words has x as their nth character
262 // Accept(x, n) == 1, meaning any of our prediction words ends at n with x
263 // The PredictionMatchTable[x] returns 16-bits for an 8-character window of characters where
264 // for each two bit pair, we provide the answer to Match(x, n) == 1 (true if the bit pair is even),
265 // and Accept(x, n) == 1 (true if the bit pair is < 2).
266 // Since hashing is used to distribute match and accept data for prediction words, the values stored
267 // at PredictionMatchTable[x] might not represent the window for character 'x' across all prediction words
268 // so that predictions can ideally produce fewer false positives.
269 // For more information it's highly recommended reading the reference link above
270 uint16 PredictMatchTable[TABLE_SIZE];
271};
#define check(expr)
Definition AssertionMacros.h:314
#define RESTRICT
Definition Platform.h:706
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
uint8_t uint8
Definition binka_ue_file_header.h:8
uint16_t uint16
Definition binka_ue_file_header.h:7
uint32_t uint32
Definition binka_ue_file_header.h:6
GeometryCollection::Facades::FMuscleActivationData Data
Definition MuscleActivationConstraints.h:15
@ Bits
Definition PacketView.h:34
constexpr int32 TABLE_SIZE
Definition ACESUtils.cpp:45
static UE_FORCEINLINE_HINT void * Memset(void *Dest, uint8 Char, SIZE_T Count)
Definition UnrealMemory.h:119
Definition PM-k.h:54
uint16 GetMinimumWordLength() const
Definition PM-k.h:63
FPredictMatch8()
Definition PM-k.h:58
void Reset()
Definition PM-k.h:173
void AddPredictionWord(const uint8 *Data, const uint16 DataLen)
Definition PM-k.h:68
bool MatchApproximate(const uint8 *Data, const uint32 DataLen) const
Definition PM-k.h:132