UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
UTF8StringView.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2
3#pragma once
4
9#include <cstring>
10
11namespace uLang
12{
13
15{
16public:
17 // Public Data
18
20 const UTF8Char* _End;
21
22 // Construction
23
24 CUTF8StringView() : _Begin(nullptr), _End(nullptr) {}
25 CUTF8StringView(ENoInit) {} // Do nothing - use with care!
29
30 void Reset() { _Begin = _End = nullptr; }
31 void Set(const UTF8Char* Begin, const UTF8Char* End) { _Begin = Begin; _End = End; }
32
33 // Accessors
34 ULANG_FORCEINLINE const UTF8Char* Data() const { return _Begin; }
36 ULANG_FORCEINLINE bool IsEmpty() const { return _Begin >= _End; }
37 ULANG_FORCEINLINE bool IsFilled() const { return _Begin < _End; }
38
41 {
42 ULANG_ASSERTF(ByteIndex >= 0 && _Begin + ByteIndex < _End, "Invalid index: ByteIndex=%i ByteLen()=%i", ByteIndex, ByteLen());
43 return _Begin[ByteIndex];
44 }
45
48 {
49 return (_End > _Begin) ? *_Begin : 0u;
50 }
51
54 {
55 return (_End > (_Begin + 1)) ? *(_Begin + 1) : 0u;
56 }
57
60 {
61 return (_End > _Begin) ? *(_End - 1) : 0u;
62 }
63
66 {
67 return (_End < Enclosing._End) ? *_End : 0u;
68 }
69
72 {
73 return ((_End + 1) < Enclosing._End) ? *(_End + 1) : 0u;
74 }
75
81
83 {
84 for (const UTF8Char *ThisChar = _Begin, *TextChar = Text._Begin; TextChar < Text._End; ++ThisChar, ++TextChar)
85 {
86 if (ThisChar >= _End || *ThisChar != *TextChar) return false;
87 }
88 return true;
89 }
90
92 {
93 if (Text.ByteLen() > ByteLen())
94 {
95 return false;
96 }
97
98 for (const UTF8Char* ThisChar = _End - Text.ByteLen(), *TextChar = Text._Begin; ThisChar < _End; ++ThisChar, ++TextChar)
99 {
100 if (*ThisChar != *TextChar) return false;
101 }
102 return true;
103 }
104
106 {
107 for (const UTF8Char* ThisChar = _Begin; ThisChar < _End; ++ThisChar)
108 {
109 if (*ThisChar == Char)
110 {
111 return int32_t(ThisChar - _Begin);
112 }
113 }
114 return IndexNone;
115 }
116
118 {
119 if (_Begin == _End)
120 {
121 return IndexNone;
122 }
123 const UTF8Char* EndChar = _End - Text.ByteLen(); // No need to check further as Text wouldn't fit
124 for (const UTF8Char* ThisChar = _Begin; ThisChar <= EndChar; ++ThisChar)
125 {
126 for (const UTF8Char *SubChar = ThisChar, *TextChar = Text._Begin; TextChar < Text._End; ++TextChar, ++SubChar)
127 {
128 if (*SubChar != *TextChar)
129 {
130 goto Continue;
131 }
132 }
133 return int32_t(ThisChar - _Begin);
134
135 Continue:;
136 }
137
138 return IndexNone;
139 }
140
142 {
143 return Find(Char) != IndexNone;
144 }
145
147 {
148 return Find(Text) != IndexNone;
149 }
150
152 {
153 const UTF8Char* EndChar = _End - Text.ByteLen(); // No need to check further as Text wouldn't fit
154 for (const UTF8Char* ThisChar = _Begin; ThisChar <= EndChar; ++ThisChar)
155 {
156 for (const UTF8Char* SubChar = ThisChar, *TextChar = Text._Begin; TextChar < Text._End; ++TextChar, ++SubChar)
157 {
159 {
160 goto Continue;
161 }
162 }
163 return true;
164
165 Continue:;
166 }
167
168 return false;
169 }
170
171 // Comparisons
172
174 {
175 if (_End - _Begin != Other._End - Other._Begin) return false;
176 for (const UTF8Char *ThisChar = _Begin, *OtherChar = Other._Begin; ThisChar < _End; ++ThisChar, ++OtherChar)
177 {
178 if (*ThisChar != *OtherChar) return false;
179 }
180 return true;
181 }
182
184 {
185 return !(*this == Other);
186 }
187
188 bool operator<(const CUTF8StringView& Other) const
189 {
190 for (const UTF8Char *ThisChar = _Begin, *OtherChar = Other._Begin; ThisChar < _End && OtherChar < Other._End; ++ThisChar, ++OtherChar)
191 {
192 if (*ThisChar < *OtherChar) return true;
193 if (*ThisChar > *OtherChar) return false;
194 }
195 return (_End - _Begin < Other._End - Other._Begin);
196 }
197
198 bool operator>(const CUTF8StringView& Other) const
199 {
200 for (const UTF8Char *ThisChar = _Begin, *OtherChar = Other._Begin; ThisChar < _End && OtherChar < Other._End; ++ThisChar, ++OtherChar)
201 {
202 if (*ThisChar < *OtherChar) return false;
203 if (*ThisChar > *OtherChar) return true;
204 }
205 return (_End - _Begin > Other._End - Other._Begin);
206 }
207
209 {
210 return !(*this > Other);
211 }
212
214 {
215 return !(*this < Other);
216 }
217
218 bool operator==(const char* NullTerminatedString) const
219 {
221 for (const UTF8Char *ThisChar = _Begin; ThisChar < _End; ++ThisChar, ++OtherChar)
222 {
223 if (*OtherChar == 0 || *ThisChar != *OtherChar) return false;
224 }
225 return *OtherChar == 0;
226 }
227
228 bool operator!=(const char* NullterminatedString) const
229 {
230 return !(*this == NullterminatedString);
231 }
232
234 {
235 if (_End - _Begin != Other._End - Other._Begin) return false;
236 for (const UTF8Char* ThisChar = _Begin, *OtherChar = Other._Begin; ThisChar < _End; ++ThisChar, ++OtherChar)
237 {
239 }
240 return true;
241 }
242
243 // Mutators
244
247 {
248 ULANG_ASSERTF(_End > _Begin, "Can't pop front from empty string!");
249 return *_Begin++;
250 }
251
259
260 // Extract sub views
261
264 {
265 return { _Begin, CMath::Clamp(_Begin + ByteCount, _Begin, _End) };
266 }
267
270 {
271 return { CMath::Clamp(_End - ByteCount, _Begin, _End), _End };
272 }
273
276 {
277 return { CMath::Clamp(_Begin + ByteIndex, _Begin, _End), _End };
278 }
279
282 {
283 return { _Begin, CMath::Clamp(_End - ByteCount, _Begin, _End) };
284 }
285
288 {
289 ULANG_ASSERTF(ByteCount >= 0, "ByteCount must be non-negative.");
290 const UTF8Char* MidBegin = CMath::Clamp(_Begin + ByteIndex, _Begin, _End);
291 const UTF8Char* MidEnd = MidBegin + CMath::Min(ByteCount, int32_t(_End - MidBegin));
292 return { MidBegin, MidEnd };
293 }
294
297 {
298 ULANG_ASSERTF(Range._Begin <= uint32_t(ByteLen()) && Range._End <= uint32_t(ByteLen()), "Range must be contained in string view.");
299 return { _Begin + Range._Begin, _Begin + Range._End };
300 }
301
304 {
305 const UTF8Char* NewBegin = _Begin + ByteIndex;
306 ULANG_ASSERTF(ByteIndex >= 0 && NewBegin <= _End, "Index of of Bounds.");
307 return { NewBegin, NewBegin };
308 }
309
312 {
313 ULANG_ASSERTF(SubView._Begin >= _Begin && SubView._End <= _End, "Index of of Bounds.");
315 }
316
317 // Unicode iteration
318
320 {
321 public:
323 : _CurrentByte(CurrentByte)
324 , _CurrentByteLen(CurrentByteLen)
325 {
326 Eval();
327 }
328
330 {
332 ++*this;
333 return temp;
334 }
335
337 {
338 _CurrentByte += _CurrentValue._ByteLengthUTF8;
339 _CurrentByteLen -= _CurrentValue._ByteLengthUTF8;
340 Eval();
341 return *this;
342 }
343
344 ULANG_FORCEINLINE UniCodePoint operator* () const { return _CurrentValue._CodePoint; }
345
346 ULANG_FORCEINLINE bool operator==(const UnicodeConstIterator& Other) const { return _CurrentByte == Other._CurrentByte; }
347 ULANG_FORCEINLINE bool operator!=(const UnicodeConstIterator& Other) const { return _CurrentByte != Other._CurrentByte; }
348 ULANG_FORCEINLINE const UTF8Char* CurrentByte() const { return this->_CurrentByte; }
349
350 private:
351
352 ULANG_FORCEINLINE void Eval()
353 {
354 _CurrentValue = _CurrentByteLen > 0 ? CUnicode::DecodeUTF8(_CurrentByte, _CurrentByteLen) : SUniCodePointLength{};
355 }
356
357 SUniCodePointLength _CurrentValue;
358 const UTF8Char* _CurrentByte;
359 size_t _CurrentByteLen;
360 };
361
364
365protected:
366
367 /*
368 * Convert index that may be relative (negative) to the length of the string to direct index and assert if out of bounds.
369 * -1=last char, -2=second to last char, etc.
370 * @return: Converted direct index
371 */
373 {
374 if (InIdx < 0)
375 {
377 ULANG_ASSERTF((ByteLen + InIdx) >= 0, "Index `%i` from end of string is out of bounds and resolved to `%i` bytes before the start of the string!", InIdx, -(ByteLen + InIdx));
378 return ByteLen + InIdx;
379 }
380
381 ULANG_ASSERTF(InIdx < int32_t(_End - _Begin), "Index `%i` is out of bounds in `%i` byte string!", InIdx, int32_t(_End - _Begin));
382 return InIdx;
383 }
384
385 /*
386 * Convert index and span count that may be relative (negative) to the length of the string to direct index and span.
387 * Assert if the index is out of bounds.
388 * @param: InOutIdx - index to convert. If negative it is relative to the end of the string -1=last char, -2=second to last char, etc.
389 * @param: InOutSpan - number of characters in span. If negative it indicates remainder of string after `InOutIdx` - so -1 = include last, -2 = include char before last, etc.
390 * @return: true if there is a valid span of characters and false if the span or string is empty
391 */
393 {
395
396 if ((ByteLen <= 0) || (InOutSpan == 0))
397 {
398 return false;
399 }
400
401 if (InOutIdx < 0)
402 {
403 ULANG_ASSERTF((ByteLen + InOutIdx) >= 0, "Index `%i` from end of string is out of bounds and resolved to `%i` bytes before the start of the string!", InOutIdx, -(ByteLen + InOutIdx));
404 InOutIdx += ByteLen;
405 }
406
407 // Permissively allow InOutIdx=ByteLen
408 ULANG_ASSERTF(InOutIdx <= ByteLen, "Index `%i` is out of bounds in `%i` byte string!", InOutIdx, ByteLen);
409
411
412 if (InOutSpan < 0)
413 {
414 InOutSpan += CountMax + 1;
415 }
416
417 // Be forgiving with the count
419
420 return InOutSpan != 0;
421 }
422
423 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
424 friend class TUTF8String;
425
426 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
427 friend class TUTF8StringBuilder;
428};
429
432
433}
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
@ Char
Character type.
#define ULANG_FORCEINLINE
Definition Common.h:188
#define ULANG_ASSERTF(expr, format,...)
Definition Common.h:290
static ULANG_FORCEINLINE uint32_t Generate(const uint8_t *Begin, const uint8_t *End, uint32_t PrevCRC=0)
Definition CRC.h:46
static ULANG_FORCEINLINE constexpr T Clamp(const T X, const T Min, const T Max)
Clamps X to be between Min and Max, inclusive.
Definition MathUtils.h:33
static ULANG_FORCEINLINE constexpr T Min(const T A, const T B)
Returns lower value in a generic way.
Definition MathUtils.h:26
Definition UTF8StringView.h:320
ULANG_FORCEINLINE UnicodeConstIterator & operator++()
Definition UTF8StringView.h:336
ULANG_FORCEINLINE UnicodeConstIterator(const UTF8Char *CurrentByte, size_t CurrentByteLen)
Definition UTF8StringView.h:322
ULANG_FORCEINLINE const UTF8Char * CurrentByte() const
Definition UTF8StringView.h:348
ULANG_FORCEINLINE bool operator==(const UnicodeConstIterator &Other) const
Definition UTF8StringView.h:346
ULANG_FORCEINLINE UniCodePoint operator*() const
Definition UTF8StringView.h:344
ULANG_FORCEINLINE UnicodeConstIterator operator++(int)
Definition UTF8StringView.h:329
ULANG_FORCEINLINE bool operator!=(const UnicodeConstIterator &Other) const
Definition UTF8StringView.h:347
Definition UTF8StringView.h:15
ULANG_FORCEINLINE CUTF8StringView SubView(int32_t ByteIndex, int32_t ByteCount=INT32_MAX) const
Definition UTF8StringView.h:287
ULANG_FORCEINLINE bool EndsWith(const CUTF8StringView &Text) const
Definition UTF8StringView.h:91
bool operator<(const CUTF8StringView &Other) const
Definition UTF8StringView.h:188
ULANG_FORCEINLINE UTF8Char SecondByte() const
Definition UTF8StringView.h:53
ULANG_FORCEINLINE SUniCodePointLength PopFirstCodePoint()
Definition UTF8StringView.h:253
void Reset()
Definition UTF8StringView.h:30
ULANG_FORCEINLINE UTF8Char NextNextByte(const CUTF8StringView &Enclosing) const
Definition UTF8StringView.h:71
ULANG_FORCEINLINE bool ContainsCaseIndependent(const CUTF8StringView &Text) const
Definition UTF8StringView.h:151
bool operator>(const CUTF8StringView &Other) const
Definition UTF8StringView.h:198
CUTF8StringView(ENoInit)
Definition UTF8StringView.h:25
const UTF8Char * _Begin
Points to first byte.
Definition UTF8StringView.h:19
CUTF8StringView()
Definition UTF8StringView.h:24
bool operator==(const char *NullTerminatedString) const
Definition UTF8StringView.h:218
ULANG_FORCEINLINE CUTF8StringView SubViewTrimBegin(int32_t ByteIndex) const
Definition UTF8StringView.h:275
ULANG_FORCEINLINE UTF8Char FirstByte() const
Definition UTF8StringView.h:47
ULANG_FORCEINLINE int32_t InputByteIdxToDirectIdx(int32_t InIdx) const
Definition UTF8StringView.h:372
ULANG_FORCEINLINE const UTF8Char * Data() const
Definition UTF8StringView.h:34
ULANG_FORCEINLINE CUTF8StringView SubViewEmpty(int32_t ByteIndex) const
Definition UTF8StringView.h:303
ULANG_FORCEINLINE CUTF8StringView SubViewEnd(int32_t ByteCount) const
Definition UTF8StringView.h:269
ULANG_FORCEINLINE int32_t Find(UTF8Char Char) const
Definition UTF8StringView.h:105
bool operator>=(const CUTF8StringView &Other) const
Definition UTF8StringView.h:213
bool IsEqualCaseIndependent(const CUTF8StringView &Other) const
Definition UTF8StringView.h:233
bool operator==(const CUTF8StringView &Other) const
Definition UTF8StringView.h:173
ULANG_FORCEINLINE bool StartsWith(const CUTF8StringView &Text) const
Definition UTF8StringView.h:82
ULANG_FORCEINLINE bool InputByteIdxSpan(int32_t &InOutIdx, int32_t &InOutSpan) const
Definition UTF8StringView.h:392
ULANG_FORCEINLINE UTF8Char PopFirstByte()
Definition UTF8StringView.h:246
ULANG_FORCEINLINE SUniCodePointLength FirstCodePoint() const
Definition UTF8StringView.h:77
ULANG_FORCEINLINE UTF8Char LastByte() const
Definition UTF8StringView.h:59
CUTF8StringView(const char *String, size_t ByteLen)
Definition UTF8StringView.h:28
CUTF8StringView(const UTF8Char *Begin, const UTF8Char *End)
Definition UTF8StringView.h:26
ULANG_FORCEINLINE bool Contains(const CUTF8StringView &Text) const
Definition UTF8StringView.h:146
ULANG_FORCEINLINE CUTF8StringView SubViewTrimEnd(int32_t ByteCount) const
Definition UTF8StringView.h:281
bool operator<=(const CUTF8StringView &Other) const
Definition UTF8StringView.h:208
const UTF8Char * _End
Points to the byte after the last byte.
Definition UTF8StringView.h:20
CUTF8StringView(const char *NullterminatedString)
Definition UTF8StringView.h:27
ULANG_FORCEINLINE bool IsFilled() const
Definition UTF8StringView.h:37
bool operator!=(const char *NullterminatedString) const
Definition UTF8StringView.h:228
ULANG_FORCEINLINE int32_t Find(const CUTF8StringView &Text) const
Definition UTF8StringView.h:117
bool operator!=(const CUTF8StringView &Other) const
Definition UTF8StringView.h:183
ULANG_FORCEINLINE UnicodeConstIterator begin() const
Definition UTF8StringView.h:362
SIdxRange SubRange(const CUTF8StringView &SubView) const
Definition UTF8StringView.h:311
ULANG_FORCEINLINE bool IsEmpty() const
Definition UTF8StringView.h:36
ULANG_FORCEINLINE const UTF8Char & operator[](int32_t ByteIndex) const
Definition UTF8StringView.h:40
ULANG_FORCEINLINE UTF8Char NextByte(const CUTF8StringView &Enclosing) const
Definition UTF8StringView.h:65
ULANG_FORCEINLINE UnicodeConstIterator end() const
Definition UTF8StringView.h:363
void Set(const UTF8Char *Begin, const UTF8Char *End)
Definition UTF8StringView.h:31
ULANG_FORCEINLINE CUTF8StringView SubView(const SIdxRange &Range) const
Definition UTF8StringView.h:296
ULANG_FORCEINLINE int32_t ByteLen() const
Definition UTF8StringView.h:35
ULANG_FORCEINLINE CUTF8StringView SubViewBegin(int32_t ByteCount) const
Definition UTF8StringView.h:263
ULANG_FORCEINLINE bool Contains(UTF8Char Char) const
Definition UTF8StringView.h:141
static ULANG_FORCEINLINE UTF8Char ToLower_ASCII(const UTF8Char Ch)
Definition Unicode.h:53
static SUniCodePointLength DecodeUTF8(const UTF8Char *Text, size_t TextByteLength)
Definition Unicode.h:79
Definition UTF8StringBuilder.h:24
Definition UTF8String.h:21
Definition VVMEngineEnvironment.h:23
@ IndexNone
Definition Common.h:381
ENoInit
Enum used in constructors to indicate they should not initialize anything.
Definition Common.h:375
uint8_t UTF8Char
UTF-8 octet.
Definition Unicode.h:20
uint32_t UniCodePoint
UTF-32 character / code point.
Definition Unicode.h:23
ULANG_FORCEINLINE uint32_t GetTypeHash(const TArray< T > Array)
Definition Array.h:2132
Definition IdxRange.h:16
Pair of code point and its length in bytes in UTF-8.
Definition Unicode.h:27
UniCodePoint _CodePoint
Definition Unicode.h:28
uint32_t _ByteLengthUTF8
Definition Unicode.h:29