UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
UTF8String.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2
3#pragma once
4
7#include <stdarg.h> // va_start, va_copy, va_end
8#include <stdio.h> // snprintf
9#include <cstring> // strlen
10
11namespace uLang
12{
13
14template<class AllocatorType, typename... AllocatorArgsType> class TUTF8StringBuilder;
15
19template<class AllocatorType, typename... AllocatorArgsType>
20class TUTF8String : AllocatorType
21{
22public:
23
24 // Construction
25
26 TUTF8String() : AllocatorType(DefaultInit) {}
29 template<typename... FormatterArgsType>
32 template<typename InitializerFunctorType> // Functor takes UTF8Char* pointer to uninitialized memory
35 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
38
39 ~TUTF8String() { if (_String._Begin) { Release(); } }
40 ULANG_FORCEINLINE void Reset() { if (_String._Begin) { Release(); _String.Reset(); } }
43
44 // Accessors
45
46 ULANG_FORCEINLINE int32_t ByteLen() const { return _String.ByteLen(); }
47 ULANG_FORCEINLINE bool IsEmpty() const { return _String.IsEmpty(); }
48 ULANG_FORCEINLINE bool IsFilled() const { return _String.IsFilled(); }
49 ULANG_FORCEINLINE const UTF8Char* AsUTF8() const { return _String._Begin ? _String._Begin : (const UTF8Char*)""; }
50 ULANG_FORCEINLINE const char* AsCString() const { return _String._Begin ? (char*)_String._Begin : ""; }
51 ULANG_FORCEINLINE const char* operator*() const { return _String._Begin ? (char*)_String._Begin : ""; }
52
55 {
56 ULANG_ASSERTF(ByteIndex >= 0 && _String._Begin + ByteIndex < _String._End, "Invalid index: Index=%i, ByteLen()=%i", ByteIndex, ByteLen());
57 return _String._Begin[ByteIndex];
58 }
59
60 // Comparison operators
61
62 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
64 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
66 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
68 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
70 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
72 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
74 ULANG_FORCEINLINE bool operator==(const CUTF8StringView& StringView) const { return _String == StringView; }
75 ULANG_FORCEINLINE bool operator!=(const CUTF8StringView& StringView) const { return _String != StringView; }
76
77 // Assignment
78
81
82 // Append
84 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
87
89 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
92
93 // Conversions
94
95 ULANG_FORCEINLINE operator const CUTF8StringView&() const { return _String; }
96 ULANG_FORCEINLINE const CUTF8StringView& ToStringView() const { return _String; }
97
98 // Unicode iteration
99
102
103 // Misc
104
106 {
107 return TUTF8String(ByteLen(), [this, Old, New](UTF8Char* Memory)
108 {
109 for (const UTF8Char* OldCh = _String._Begin; OldCh < _String._End; ++OldCh)
110 {
111 *Memory++ = (*OldCh == Old) ? New : *OldCh;
112 }
113 });
114 }
115
117 {
118 // Anything to do?
119 if (IsEmpty() || Old.IsEmpty())
120 {
121 return *this;
122 }
123
124 TUTF8String Result(NoInit, GetAllocator());
125 Result.AllocateUninitialized(ByteLen() * 2);
126 UTF8Char* DstChar = const_cast<UTF8Char*>(Result._String._Begin);
127
128 // Find matches
129 const UTF8Char* BeginChar = _String._Begin;
130 const UTF8Char* EndChar = _String._End - Old.ByteLen(); // No need to check further as Text wouldn't fit
131 for (const UTF8Char* ThisChar = BeginChar; ThisChar <= EndChar; ++ThisChar)
132 {
133 const UTF8Char* SubChar = ThisChar;
134 for (const UTF8Char* OldChar = Old._Begin; OldChar < Old._End; ++OldChar, ++SubChar)
135 {
136 if (*SubChar != *OldChar)
137 {
138 goto Continue;
139 }
140 }
141
142 // We found an occurrence! Copy partial result and continue...
143
144 // Will it fit?
145 {
146 // BytesNeeded includes the bytes for this partial result plus the remainder part
147 intptr_t BytesNeeded = (ThisChar - BeginChar + New.ByteLen()) + (_String._End - SubChar);
148 if (DstChar + BytesNeeded > Result._String._End)
149 {
150 // No, double the size of the result
151 intptr_t DstOffset = DstChar - Result._String._Begin;
152 Result._String._End = DstChar; // So we won't copy unnecessary bytes to the new memory
153 Result.Reallocate(Result.ByteLen() * 2 + BytesNeeded);
154 DstChar = const_cast<UTF8Char*>(Result._String._Begin + DstOffset);
155 }
156 }
157
158 // Copy partial result
161 if (New.ByteLen())
162 {
163 ::memcpy(DstChar, New._Begin, New.ByteLen());
164 }
165 DstChar += New.ByteLen();
166
167 // And continue
169 --ThisChar; // It's going to be incremented by the loop
170
171 Continue:;
172 }
173
174 // Copy remainder part after last replacement
175 // We have made sure in the code above that there is enough space for it
177 DstChar += (_String._End - BeginChar);
178
179 // Shrink result to perfect fit
180 Result.Reallocate(DstChar - Result._String._Begin);
181 return Result;
182 }
183
184 static const TUTF8String& GetEmpty();
185
186protected:
187
190
191private:
192
193 template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
194 friend class TUTF8StringBuilder;
195
196 TUTF8String(ENoInit) {} // Do nothing - friends only!
197 TUTF8String(ENoInit, const AllocatorType& Allocator) : AllocatorType(Allocator), _String(NoInit) {}
198
199 ULANG_FORCEINLINE UTF8Char* AllocateUninitialized(size_t ByteLength)
200 {
201 UTF8Char* Memory = (UTF8Char*)GetAllocator().Allocate(ByteLength + 1);
202 _String._Begin = Memory;
203 _String._End = Memory + ByteLength;
204
205 return Memory;
206 }
207
208 ULANG_FORCEINLINE UTF8Char* Reallocate(size_t ByteLength)
209 {
210 size_t OldLength = ByteLen();
211
212 UTF8Char* NewMemory = (UTF8Char*)GetAllocator().Allocate(ByteLength + 1);
213
214 if (_String._Begin)
215 {
216 memcpy(NewMemory, _String._Begin, (ByteLength < OldLength ? ByteLength : OldLength));
217 Release();
218 }
219
220 _String._Begin = NewMemory;
221 _String._End = NewMemory + ByteLength;
222
223 // re-null-terminate
224 NewMemory[ByteLength] = 0;
225
226 return NewMemory;
227 }
228
230 ULANG_FORCEINLINE void AllocateInitialized(const UTF8Char* String, size_t ByteLength)
231 {
232 UTF8Char* Memory = AllocateUninitialized(ByteLength);
233
234 if (ByteLength)
235 {
236 memcpy(Memory, String, ByteLength);
237 }
238 Memory[ByteLength] = 0; // Add null termination
239 }
240
243 {
244 GetAllocator().Deallocate((void*)_String._Begin);
245 }
246
247 AllocatorType& GetAllocator()
248 {
249 return *this;
250 }
251
252 const AllocatorType& GetAllocator() const
253 {
254 return *this;
255 }
256
258 CUTF8StringView _String;
259};
260
261template<class AllocatorType, typename... AllocatorArgsType>
267
270
273
275template<class AllocatorType, typename... AllocatorArgsType>
277
278//=======================================================================================
279// TUTF8String Inline Methods
280//=======================================================================================
281
282template<class AllocatorType, typename... AllocatorArgsType>
289
290template<class AllocatorType, typename... AllocatorArgsType>
292 : AllocatorType(uLang::ForwardArg<AllocatorArgsType>(AllocatorArgs)...)
293 , _String(NoInit)
294{
295 AllocateInitialized(StringView._Begin, StringView.ByteLen());
296}
297
298template<class AllocatorType, typename... AllocatorArgsType>
299template<typename... FormatterArgsType>
301 : AllocatorType(uLang::ForwardArg<AllocatorArgsType>(AllocatorArgs)...)
302 , _String(NoInit)
303{
304 // Compute length of string
305 size_t ByteLength = ::snprintf(nullptr, 0, NullTerminatedFormat, FormatterArgs...);
306
307 // Sanity check that everything went well
308 ULANG_ASSERTF(ByteLength != size_t(-1), "Invalid format string: %s", NullTerminatedFormat);
309
310 // Allocate memory
311 size_t AllocBytes = ByteLength + 1;
312 UTF8Char* Text = (UTF8Char*)GetAllocator().Allocate(AllocBytes);
313
314 // Create string
316
317 // Store string and allocator
318 _String = CUTF8StringView(Text, Text + ByteLength);
319}
320
321template<class AllocatorType, typename... AllocatorArgsType>
323 : AllocatorType(uLang::ForwardArg<AllocatorArgsType>(AllocatorArgs)...)
324 , _String(NoInit)
325{
327
328 // Compute length of string
330 size_t ByteLength = ::vsnprintf(nullptr, 0, NullTerminatedFormat, FormatterArgsLocal);
332
333 // Sanity check that everything went well
334 ULANG_ASSERTF(ByteLength != size_t(-1), "Invalid format string: %s", NullTerminatedFormat);
335
336 // Allocate memory
337 size_t AllocBytes = ByteLength + 1;
338 UTF8Char* Text = (UTF8Char*)GetAllocator().Allocate(AllocBytes);
339
340 // Create string
344
345 // Store string and allocator
346 _String = CUTF8StringView(Text, Text + ByteLength);
347}
348
349template<class AllocatorType, typename... AllocatorArgsType>
350template<typename InitializerFunctorType>
352 : AllocatorType(uLang::ForwardArg<AllocatorArgsType>(AllocatorArgs)...)
353 , _String(NoInit)
354{
355 ULANG_ASSERTF(ByteLength <= INT32_MAX, "TUTF8String doesn't support ByteLength > INT32_MAX. (ByteLength=%zu)", ByteLength);
356
357 UTF8Char* Memory = (UTF8Char*)GetAllocator().Allocate(ByteLength + 1);
358 _String._Begin = Memory;
359 _String._End = Memory + ByteLength;
361 Memory[ByteLength] = 0; // Add null termination
362}
363
364template<class AllocatorType, typename... AllocatorArgsType>
366 : AllocatorType(Other.GetAllocator())
367 , _String(NoInit)
368{
369 AllocateInitialized(Other._String._Begin, Other.ByteLen());
370}
371
372template<class AllocatorType, typename... AllocatorArgsType>
373template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
380
381template<class AllocatorType, typename... AllocatorArgsType>
383 : AllocatorType(Other.GetAllocator())
384 , _String(Other._String)
385{
386 Other._String.Reset();
387}
388
389template<class AllocatorType, typename... AllocatorArgsType>
391{
392 if (_String._Begin) { Release(); }
393 GetAllocator() = Other.GetAllocator();
394 AllocateInitialized(Other._String._Begin, Other.ByteLen());
395 return *this;
396}
397
398template<class AllocatorType, typename... AllocatorArgsType>
400{
401 if (_String._Begin) { Release(); }
402 GetAllocator() = Other.GetAllocator();
403 _String = Other._String;
404 Other._String.Reset();
405 return *this;
406}
407
408template<class AllocatorType, typename... AllocatorArgsType>
410{
411 size_t OldStringLength = ByteLen();
413
414 UTF8Char* NewMemory = Reallocate(NewStringLength);
415 if (OtherStringView.ByteLen())
416 {
418 }
419
420 return *this;
421}
422
423template<class AllocatorType, typename... AllocatorArgsType>
424template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
429
430template<class AllocatorType, typename... AllocatorArgsType>
436
437template<class AllocatorType, typename... AllocatorArgsType>
443
444template<class AllocatorType, typename... AllocatorArgsType>
445template<class OtherAllocatorType, typename... OtherAllocatorArgsType>
450
451template<class AllocatorType, typename... AllocatorArgsType>
457
458}
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
T * New(FMemStackBase &Mem, int32 Count=1, int32 Align=DEFAULT_ALIGNMENT)
Definition MemStack.h:259
#define ULANG_FORCEINLINE
Definition Common.h:188
#define ULANG_ASSERTF(expr, format,...)
Definition Common.h:290
memcpy(InputBufferBase, BinkBlocksData, BinkBlocksSize)
Definition UTF8StringView.h:320
Definition UTF8StringView.h:15
void Reset()
Definition UTF8StringView.h:30
const UTF8Char * _Begin
Points to first byte.
Definition UTF8StringView.h:19
ULANG_FORCEINLINE int32_t InputByteIdxToDirectIdx(int32_t InIdx) const
Definition UTF8StringView.h:372
ULANG_FORCEINLINE bool InputByteIdxSpan(int32_t &InOutIdx, int32_t &InOutSpan) const
Definition UTF8StringView.h:392
const UTF8Char * _End
Points to the byte after the last byte.
Definition UTF8StringView.h:20
ULANG_FORCEINLINE bool IsFilled() const
Definition UTF8StringView.h:37
ULANG_FORCEINLINE UnicodeConstIterator begin() const
Definition UTF8StringView.h:362
ULANG_FORCEINLINE bool IsEmpty() const
Definition UTF8StringView.h:36
ULANG_FORCEINLINE UnicodeConstIterator end() const
Definition UTF8StringView.h:363
ULANG_FORCEINLINE int32_t ByteLen() const
Definition UTF8StringView.h:35
Definition UTF8StringBuilder.h:24
Definition UTF8String.h:21
ULANG_FORCEINLINE void Reset()
Definition UTF8String.h:40
ULANG_FORCEINLINE bool operator>=(const TUTF8String< OtherAllocatorType, OtherAllocatorArgsType... > &Other) const
Definition UTF8String.h:73
TUTF8String operator+(const CUTF8StringView &OtherStringView) const
Definition UTF8String.h:438
TUTF8String & operator+=(const CUTF8StringView &OtherStringView)
Definition UTF8String.h:409
TUTF8String(size_t ByteLength, InitializerFunctorType &&InitializerFunctor, AllocatorArgsType &&... AllocatorArgs)
ULANG_FORCEINLINE TUTF8String Replace(UTF8Char Old, UTF8Char New) const
Definition UTF8String.h:105
TUTF8String(TUTF8String &&Other)
Definition UTF8String.h:382
ULANG_FORCEINLINE bool operator!=(const TUTF8String< OtherAllocatorType, OtherAllocatorArgsType... > &Other) const
Definition UTF8String.h:65
TUTF8String operator+(const char *OtherNullTerminatedString) const
Definition UTF8String.h:452
ULANG_FORCEINLINE const char * operator*() const
Definition UTF8String.h:51
TUTF8String & operator=(TUTF8String &&Other)
Definition UTF8String.h:399
ULANG_FORCEINLINE bool InputByteIdxSpan(int32_t &InOutIdx, int32_t &InOutSpan) const
Definition UTF8String.h:189
ULANG_FORCEINLINE bool operator==(const CUTF8StringView &StringView) const
Definition UTF8String.h:74
TUTF8String & operator+=(const TUTF8String< OtherAllocatorType, OtherAllocatorArgsType... > &Other)
ULANG_FORCEINLINE UTF8Char * Resize(int32_t NewByteLen)
Definition UTF8String.h:42
TUTF8String(const TUTF8String< OtherAllocatorType, OtherAllocatorArgsType... > &Other, AllocatorArgsType &&... AllocatorArgs)
TUTF8String operator+(const TUTF8String< OtherAllocatorType, OtherAllocatorArgsType... > &Other) const
TUTF8String(AllocatorArgsType &&... AllocatorArgs, const char *NullTerminatedFormat, FormatterArgsType &&... FormatterArgs)
TUTF8String(AllocatorArgsType &&... AllocatorArgs, const char *NullTerminatedFormat, va_list FormatterArgs)
Definition UTF8String.h:322
ULANG_FORCEINLINE bool IsEmpty() const
Definition UTF8String.h:47
ULANG_FORCEINLINE const UTF8Char * AsUTF8() const
Definition UTF8String.h:49
ULANG_FORCEINLINE void Empty()
Definition UTF8String.h:41
static const TUTF8String & GetEmpty()
Definition UTF8String.h:262
ULANG_FORCEINLINE const CUTF8StringView & ToStringView() const
Definition UTF8String.h:96
TUTF8String(const char *NullTerminatedString, AllocatorArgsType &&... AllocatorArgs)
Definition UTF8String.h:283
ULANG_FORCEINLINE CUTF8StringView::UnicodeConstIterator begin() const
Definition UTF8String.h:100
ULANG_FORCEINLINE CUTF8StringView::UnicodeConstIterator end() const
Definition UTF8String.h:101
~TUTF8String()
Definition UTF8String.h:39
TUTF8String & operator=(const TUTF8String &Other)
Definition UTF8String.h:390
TUTF8String(const CUTF8StringView &StringView, AllocatorArgsType &&... AllocatorArgs)
Definition UTF8String.h:291
ULANG_FORCEINLINE bool operator<=(const TUTF8String< OtherAllocatorType, OtherAllocatorArgsType... > &Other) const
Definition UTF8String.h:69
TUTF8String & operator+=(const char *OtherNullTerminatedString)
Definition UTF8String.h:431
TUTF8String(const TUTF8String &Other)
Definition UTF8String.h:365
ULANG_FORCEINLINE bool IsFilled() const
Definition UTF8String.h:48
TUTF8String Replace(const CUTF8StringView &Old, const CUTF8StringView &New) const
Definition UTF8String.h:116
ULANG_FORCEINLINE bool operator>(const TUTF8String< OtherAllocatorType, OtherAllocatorArgsType... > &Other) const
Definition UTF8String.h:71
ULANG_FORCEINLINE int32_t InputByteIdxToDirectIdx(int32_t InIdx) const
Definition UTF8String.h:188
ULANG_FORCEINLINE bool operator==(const TUTF8String< OtherAllocatorType, OtherAllocatorArgsType... > &Other) const
Definition UTF8String.h:63
ULANG_FORCEINLINE const char * AsCString() const
Definition UTF8String.h:50
ULANG_FORCEINLINE int32_t ByteLen() const
Definition UTF8String.h:46
ULANG_FORCEINLINE const UTF8Char & operator[](int32_t ByteIndex) const
Definition UTF8String.h:54
ULANG_FORCEINLINE bool operator<(const TUTF8String< OtherAllocatorType, OtherAllocatorArgsType... > &Other) const
Definition UTF8String.h:67
TUTF8String()
Definition UTF8String.h:26
ULANG_FORCEINLINE bool operator!=(const CUTF8StringView &StringView) const
Definition UTF8String.h:75
uLang::CUTF8StringView CUTF8StringView
Definition VstNode.h:51
Definition VVMEngineEnvironment.h:23
@ DefaultInit
Definition Common.h:378
ENoInit
Enum used in constructors to indicate they should not initialize anything.
Definition Common.h:375
@ NoInit
Definition Common.h:375
uint8_t UTF8Char
UTF-8 octet.
Definition Unicode.h:20
ULANG_FORCEINLINE T && ForwardArg(typename TRemoveReference< T >::Type &Obj)
Definition References.h:115
ULANG_FORCEINLINE uint32_t GetTypeHash(const TArray< T > Array)
Definition Array.h:2132