UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
Unicode.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2
3#pragma once
4
6
7#define UE_API ULANGCORE_API
8
9namespace uLang
10{
11// UTF-8 Context
12// - http://utf8everywhere.org/
13// - https://github.com/nemtrif/utfcpp
14
15// ASCII Character range is from 0 to 127 so any byte less than this is a valid ASCII character.
16inline constexpr size_t ASCII_RANGE = 128;
17inline constexpr size_t BYTE_RANGE = 256;
18
21
24
31
33{
35 uint8_t NumUnits; // NumUnits==0 indicates an invalid codepoint.
36};
37
38
41{
42public:
43
44 // Tables for doing fast whitespace character lookup [Any non-ASCII character will result in false.]
45 static UE_API const bool _ASCIITable_Whitespace[BYTE_RANGE]; // space, tab, newline, carriage return
46 static UE_API const bool _ASCIITable_Identifier[BYTE_RANGE]; // A-Z, a-z, 1-9, _
47
49 ULANG_FORCEINLINE static bool IsDigitASCII(const UTF8Char Ch) { return (unsigned(Ch) - unsigned('0')) < 10u; }
50 ULANG_FORCEINLINE static bool IsUpperASCII(const UTF8Char Ch) { return (unsigned(Ch) - unsigned('A')) < 26u; }
51 ULANG_FORCEINLINE static bool IsLowerASCII(const UTF8Char Ch) { return (unsigned(Ch) - unsigned('a')) < 26u; }
52 ULANG_FORCEINLINE static bool IsAlphaASCII(const UTF8Char Ch) { return ((unsigned(Ch) - unsigned('a')) < 26u) || ((unsigned(Ch) - unsigned('A')) < 26u); }
53 ULANG_FORCEINLINE static UTF8Char ToLower_ASCII(const UTF8Char Ch) { return IsUpperASCII(Ch) ? Ch + ('a' - 'A') : Ch; }
54 ULANG_FORCEINLINE static UTF8Char ToUpper_ASCII(const UTF8Char Ch) { return IsLowerASCII(Ch) ? Ch - ('a' - 'A') : Ch; }
55
58
62
63
64private:
65
66 static UE_API SUniCodePointLength DecodeUTF8NonASCII(const UTF8Char * Text, size_t TextByteLength);
67
68 static UE_API bool IsIdentifierStartNonASCII(UniCodePoint CodePoint);
69 static UE_API bool IsIdentifierTailNonASCII(UniCodePoint CodePoint);
70
71};
72
73//=======================================================================================
74// CUnicode Inline Methods
75//=======================================================================================
76
77// This inline function optimizes for the most common case that the code point is ASCII
78// Only for non-ASCII code points an actual function call is made
80{
81 ULANG_ASSERTF(TextByteLength > 0, "Can't decode UTF8 from empty string!");
82
83 // If ASCII, deal with it right here
84 UniCodePoint FirstByte = *Text;
85 if (FirstByte < ASCII_RANGE)
86 {
87 // Handle all ASCII characters inline
88 return { FirstByte, 1 };
89 }
90
91 // Not ASCII - call the professionals
92 return DecodeUTF8NonASCII(Text, TextByteLength);
93}
94
95// This inline function optimizes for the most common case that the code point is ASCII
96// Only for non-ASCII code points an actual function call is made
98{
99 // If ASCII, deal with it right here
101 {
102 // Handle all ASCII characters inline
103 return ((CodePoint >= 'A' && CodePoint <= 'z' && (CodePoint <= 'Z' || CodePoint >= 'a')) || CodePoint == '_');
104 }
105
106 // Not ASCII - call the professionals
107 return IsIdentifierStartNonASCII(CodePoint);
108}
109
110// This inline function optimizes for the most common case that the code point is ASCII
111// Only for non-ASCII code points an actual function call is made
113{
114 // If ASCII, deal with it right here
116 {
117 // Handle all ASCII characters inline
119 }
120
121 // Not ASCII - call the professionals
122 return IsIdentifierTailNonASCII(CodePoint);
123}
124
125} // uLang namespace
126
127#undef UE_API
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
#define UE_API
Definition SColorGradingComponentViewer.h:12
#define ULANG_FORCEINLINE
Definition Common.h:188
#define ULANG_ASSERTF(expr, format,...)
Definition Common.h:290
Helper class providing useful unicode functionality.
Definition Unicode.h:41
static ULANG_FORCEINLINE bool IsWhitespaceASCII(const UTF8Char Ch)
Definition Unicode.h:48
static ULANG_FORCEINLINE UTF8Char ToLower_ASCII(const UTF8Char Ch)
Definition Unicode.h:53
static bool IsIdentifierTail(UniCodePoint CodePoint)
Identifier continuation/middle?
Definition Unicode.h:112
static ULANG_FORCEINLINE bool IsLowerASCII(const UTF8Char Ch)
Definition Unicode.h:51
static SUniCodePointLength DecodeUTF8(const UTF8Char *Text, size_t TextByteLength)
Definition Unicode.h:79
static UE_API SUTF8CodePoint EncodeUTF8(UniCodePoint CodePoint)
Definition Unicode.cpp:222
static ULANG_FORCEINLINE bool IsUpperASCII(const UTF8Char Ch)
Definition Unicode.h:50
static ULANG_FORCEINLINE bool IsAlphaASCII(const UTF8Char Ch)
Definition Unicode.h:52
static ULANG_FORCEINLINE UTF8Char ToUpper_ASCII(const UTF8Char Ch)
Definition Unicode.h:54
static ULANG_FORCEINLINE bool IsDigitASCII(const UTF8Char Ch)
Definition Unicode.h:49
static UE_API const bool _ASCIITable_Whitespace[BYTE_RANGE]
Definition Unicode.h:45
static UE_API const bool _ASCIITable_Identifier[BYTE_RANGE]
Definition Unicode.h:46
static bool IsIdentifierStart(UniCodePoint CodePoint)
uLang-specific detection of identifier code points
Definition Unicode.h:97
Definition VVMEngineEnvironment.h:23
constexpr size_t BYTE_RANGE
Definition Unicode.h:17
constexpr size_t ASCII_RANGE
Definition Unicode.h:16
uint8_t UTF8Char
UTF-8 octet.
Definition Unicode.h:20
uint32_t UniCodePoint
UTF-32 character / code point.
Definition Unicode.h:23
Definition Unicode.h:33
uint8_t NumUnits
Definition Unicode.h:35
UTF8Char Units[4]
Definition Unicode.h:34
Pair of code point and its length in bytes in UTF-8.
Definition Unicode.h:27
UniCodePoint _CodePoint
Definition Unicode.h:28
uint32_t _ByteLengthUTF8
Definition Unicode.h:29