UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
GenericPlatformString.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2
3#pragma once
4
5#include "CoreTypes.h"
10#include "Traits/IsCharType.h"
12
13#include <type_traits>
14
15namespace UE::Core::Private
16{
17 // The Dest parameter is just used for overload resolution
18 CORE_API int32 GetConvertedLength(const UTF8CHAR* Dest, const WIDECHAR* Src);
19 CORE_API int32 GetConvertedLength(const UTF8CHAR* Dest, const WIDECHAR* Src, int32 SrcLen);
20 CORE_API int32 GetConvertedLength(const UTF8CHAR* Dest, const UCS2CHAR* Src);
21 CORE_API int32 GetConvertedLength(const UTF8CHAR* Dest, const UCS2CHAR* Src, int32 SrcLen);
22 CORE_API int32 GetConvertedLength(const UTF8CHAR* Dest, const UTF32CHAR* Src);
23 CORE_API int32 GetConvertedLength(const UTF8CHAR* Dest, const UTF32CHAR* Src, int32 SrcLen);
24 CORE_API int32 GetConvertedLength(const ANSICHAR* Dest, const UTF8CHAR* Src);
25 CORE_API int32 GetConvertedLength(const ANSICHAR* Dest, const UTF8CHAR* Src, int32 SrcLen);
26 CORE_API int32 GetConvertedLength(const WIDECHAR* Dest, const UTF8CHAR* Src);
27 CORE_API int32 GetConvertedLength(const WIDECHAR* Dest, const UTF8CHAR* Src, int32 SrcLen);
28 CORE_API int32 GetConvertedLength(const UCS2CHAR* Dest, const UTF8CHAR* Src);
29 CORE_API int32 GetConvertedLength(const UCS2CHAR* Dest, const UTF8CHAR* Src, int32 SrcLen);
30
31 CORE_API UTF8CHAR* Convert(UTF8CHAR* Dest, int32 DestLen, const WIDECHAR* Src);
32 CORE_API UTF8CHAR* Convert(UTF8CHAR* Dest, int32 DestLen, const WIDECHAR* Src, int32 SrcLen);
33 CORE_API UTF8CHAR* Convert(UTF8CHAR* Dest, int32 DestLen, const UCS2CHAR* Src);
34 CORE_API UTF8CHAR* Convert(UTF8CHAR* Dest, int32 DestLen, const UCS2CHAR* Src, int32 SrcLen);
35 CORE_API UTF8CHAR* Convert(UTF8CHAR* Dest, int32 DestLen, const UTF32CHAR* Src);
36 CORE_API UTF8CHAR* Convert(UTF8CHAR* Dest, int32 DestLen, const UTF32CHAR* Src, int32 SrcLen);
37 CORE_API ANSICHAR* Convert(ANSICHAR* Dest, int32 DestLen, const UTF8CHAR* Src);
38 CORE_API ANSICHAR* Convert(ANSICHAR* Dest, int32 DestLen, const UTF8CHAR* Src, int32 SrcLen);
39 CORE_API WIDECHAR* Convert(WIDECHAR* Dest, int32 DestLen, const UTF8CHAR* Src);
40 CORE_API WIDECHAR* Convert(WIDECHAR* Dest, int32 DestLen, const UTF8CHAR* Src, int32 SrcLen);
41 CORE_API UCS2CHAR* Convert(UCS2CHAR* Dest, int32 DestLen, const UTF8CHAR* Src);
42 CORE_API UCS2CHAR* Convert(UCS2CHAR* Dest, int32 DestLen, const UTF8CHAR* Src, int32 SrcLen);
43}
44
45// These will be moved inside GenericPlatformString.cpp when the platform layer handles UTF-16
46// instead of StringConv.h.
47#define HIGH_SURROGATE_START_CODEPOINT ((uint16)0xD800)
48#define HIGH_SURROGATE_END_CODEPOINT ((uint16)0xDBFF)
49#define LOW_SURROGATE_START_CODEPOINT ((uint16)0xDC00)
50#define LOW_SURROGATE_END_CODEPOINT ((uint16)0xDFFF)
51#define ENCODED_SURROGATE_START_CODEPOINT ((uint32)0x10000)
52#define ENCODED_SURROGATE_END_CODEPOINT ((uint32)0x10FFFF)
53
54#define UNICODE_BOGUS_CHAR_CODEPOINT '?'
55static_assert(sizeof(UNICODE_BOGUS_CHAR_CODEPOINT) <= sizeof(ANSICHAR) && (UNICODE_BOGUS_CHAR_CODEPOINT) >= 32 && (UNICODE_BOGUS_CHAR_CODEPOINT) <= 127, "The Unicode Bogus character point is expected to fit in a single ANSICHAR here");
56
61{
68 template <typename DestEncoding, typename SourceEncoding>
69 static constexpr bool CanConvertCodepoint(SourceEncoding Codepoint)
70 {
71 // It is assumed that the incoming codepoint is already valid and we're only testing if it can be converted to DestEncoding.
72
73 static_assert(TIsCharType<SourceEncoding>::Value, "Source encoding is not a char type");
74 static_assert(TIsCharType<DestEncoding >::Value, "Destination encoding is not a char type");
75
76 // This is only defined for fixed-width encodings, because codepoints cannot be represented in a single variable-width code unit.
77 static_assert(TIsFixedWidthCharEncoding_V<SourceEncoding>, "Source encoding is not fixed-width");
78
80 {
81 // Simple conversions mean conversion is always possible
82 return true;
83 }
85 {
86 // Converting all codepoints to a variable-width encoding should always be possible
87 return true;
88 }
89 else if constexpr (std::is_same_v<DestEncoding, ANSICHAR>)
90 {
91 return (uint32)Codepoint <= 0x7F;
92 }
93 else
94 {
95 // The logic above should hopefully mean this branch is only taken for UTF32CHAR->UCS2CHAR.
96 // There's a variety of '16-bit' char types between platforms though, so let's just test sizes.
97 static_assert(sizeof(SourceEncoding) == 4 && sizeof(DestEncoding) == 2, "Unimplemented conversion");
98
99 // Can't encode more than 16-bit in UCS-2
100 return (uint32)Codepoint <= 0xFFFF;
101 }
102 }
103
104
110 template <typename Encoding>
111 static const TCHAR* GetEncodingTypeName();
112
113 static const ANSICHAR* GetEncodingName()
114 {
115#if PLATFORM_TCHAR_IS_4_BYTES
116 return "UTF-32LE";
117#else
118 return "UTF-16LE";
119#endif
120 }
121
125 static constexpr bool IsUnicodeEncoded = true;
126
127
137 template <typename SourceEncoding, typename DestEncoding>
139 {
141 {
142 for (;;)
143 {
144 if (DestSize == 0)
145 {
146 return nullptr;
147 }
148
149 if (!(*Dest++ = (DestEncoding)*Src++))
150 {
151 return Dest;
152 }
153
154 --DestSize;
155 }
156 }
158 {
159 DestEncoding* DestCopy = Dest;
160 const SourceEncoding* SrcCopy = Src;
162
163 bool bInvalidChars = false;
164 for (;;)
165 {
166 if (DestSize == 0)
167 {
168 Dest = nullptr;
169 break;
170 }
171
172 SourceEncoding SrcCh = *Src++;
173 *Dest++ = (DestEncoding)SrcCh;
174 if (!SrcCh)
175 {
176 break;
177 }
179
180 --DestSize;
181 }
182
183 if (bInvalidChars)
184 {
185 for (;;)
186 {
187 if (DestSizeCopy == 0)
188 {
189 break;
190 }
191
193 if (!SrcCh)
194 {
195 break;
196 }
198 {
200 }
201 ++DestCopy;
202
203 --DestSizeCopy;
204 }
205 }
206
207 return Dest;
208 }
209 else
210 {
212 if (Result)
213 {
214 *Result++ = (DestEncoding)0;
215 }
216 return Result;
217 }
218 }
219
231 template <typename SourceEncoding, typename DestEncoding>
233 {
235 {
236 if (DestSize < SrcSize)
237 {
238 return nullptr;
239 }
240
241 return (DestEncoding*)Memcpy(Dest, Src, SrcSize * sizeof(SourceEncoding)) + SrcSize;
242 }
244 {
245 const int32 Size = DestSize <= SrcSize ? DestSize : SrcSize;
246 for (int I = 0; I < Size; ++I)
247 {
248 SourceEncoding SrcCh = Src[I];
249 Dest[I] = (DestEncoding)SrcCh;
250 }
251
252 return DestSize < SrcSize ? nullptr : Dest + Size;
253 }
255 {
256 const int32 Size = DestSize <= SrcSize ? DestSize : SrcSize;
257 bool bInvalidChars = false;
258 for (int I = 0; I < Size; ++I)
259 {
260 SourceEncoding SrcCh = Src[I];
261 Dest[I] = (DestEncoding)SrcCh;
263 }
264
265 if (bInvalidChars)
266 {
267 for (int I = 0; I < Size; ++I)
268 {
270 {
272 }
273 }
274 }
275
276 return DestSize < SrcSize ? nullptr : Dest + Size;
277 }
278 else
279 {
280 return UE::Core::Private::Convert(Dest, DestSize, Src, SrcSize);
281 }
282 }
283
284
292 template <typename DestEncoding, typename SourceEncoding>
294 {
296 {
297 int32 Result = 0;
298 while (*Src)
299 {
300 ++Src;
301 ++Result;
302 }
303 return Result + 1;
304 }
305 else
306 {
307 return UE::Core::Private::GetConvertedLength((DestEncoding*)nullptr, Src) + 1;
308 }
309 }
310
319 template <typename DestEncoding, typename SourceEncoding>
331
341
342private:
351 static CORE_API void* Memcpy(void* Dest, const void* Src, SIZE_T Count);
352};
FPlatformTypes::CHAR16 UCS2CHAR
A 16-bit character containing a UCS2 (Unicode, 16-bit, fixed-width) code unit, used for compatibility...
Definition Platform.h:1139
FPlatformTypes::SIZE_T SIZE_T
An unsigned integer the same size as a pointer, the same as UPTRINT.
Definition Platform.h:1150
FPlatformTypes::UTF32CHAR UTF32CHAR
A 32-bit character containing a UTF32 (Unicode, 32-bit, fixed-width) code unit.
Definition Platform.h:1143
FPlatformTypes::TCHAR TCHAR
Either ANSICHAR or WIDECHAR, depending on whether the platform supports wide characters or the requir...
Definition Platform.h:1135
FPlatformTypes::WIDECHAR WIDECHAR
A wide character. Normally a signed type.
Definition Platform.h:1133
FPlatformTypes::int32 int32
A 32-bit signed integer.
Definition Platform.h:1125
FPlatformTypes::UTF8CHAR UTF8CHAR
An 8-bit character containing a UTF8 (Unicode, 8-bit, variable-width) code unit.
Definition Platform.h:1137
FPlatformTypes::ANSICHAR ANSICHAR
An ANSI character. Normally a signed type.
Definition Platform.h:1131
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
#define UNICODE_BOGUS_CHAR_CODEPOINT
Definition GenericPlatformString.h:54
uint32 Size
Definition VulkanMemory.cpp:4034
uint32_t uint32
Definition binka_ue_file_header.h:6
implementation
Definition PlayInEditorLoadingScope.h:8
int32 GetConvertedLength(const UTF8CHAR *, const WIDECHAR *Source)
Definition GenericPlatformString.cpp:522
UTF8CHAR * Convert(UTF8CHAR *Dest, int32 DestLen, const WIDECHAR *Src)
Definition GenericPlatformString.cpp:595
Definition GenericPlatformStricmp.h:16
Definition GenericPlatformString.h:61
static const TCHAR * GetEncodingTypeName()
static int32 ConvertedLength(const SourceEncoding *Src, int32 SrcSize)
Definition GenericPlatformString.h:320
static constexpr bool CanConvertCodepoint(SourceEncoding Codepoint)
Definition GenericPlatformString.h:69
static int32 ConvertedLength(const SourceEncoding *Src)
Definition GenericPlatformString.h:293
static CORE_API int32 Strncmp(const ANSICHAR *String1, const ANSICHAR *String2, SIZE_T Count)
Definition GenericPlatformString.cpp:719
static DestEncoding * Convert(DestEncoding *Dest, int32 DestSize, const SourceEncoding *Src, int32 SrcSize)
Definition GenericPlatformString.h:232
static DestEncoding * Convert(DestEncoding *Dest, int32 DestSize, const SourceEncoding *Src)
Definition GenericPlatformString.h:138
static const ANSICHAR * GetEncodingName()
Definition GenericPlatformString.h:113
static constexpr bool IsUnicodeEncoded
Definition GenericPlatformString.h:125
Definition IsCharType.h:10