UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
VerseGrammar.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2// Dependency-free allocation-free single-header Verse grammar library.
3//--------------------------------------------------------------------------------------------------------------------------------------------------------------
4
5#pragma once
6
7// The #if TIM are special cases for using the library outside of UE5 and for handling specification alignment issues
8// that aren't ready for UE5 yet.
9#define TIM 0
10
11#if !TIM
12#include <utility>
13#include <stdint.h>
14#endif
15
16namespace Verse {
17namespace Grammar {
18
19#ifndef VERSE_MAX_EXPR_DEPTH
20#define VERSE_MAX_EXPR_DEPTH 100
21#endif
22
23#ifndef VERSE_MAX_INDCMT_DEPTH
24#define VERSE_MAX_INDCMT_DEPTH 3
25#endif
26
27// Macros.
28#if /*NDEBUG*/false
29#define GRAMMAR_ASSERT(c) (void)(0)
30#else
31#define GRAMMAR_ASSERT(c) ((c)? (void)0: Verse::Grammar::Err())
32#endif
33#define GRAMMAR_RUN(e) {auto GrammarTemp=(e); if(!GrammarTemp) return GrammarTemp.GetError();}
34#define GRAMMAR_SET(r,e) {auto GrammarTemp=(e); if(!GrammarTemp) return GrammarTemp.GetError(); r=*GrammarTemp;}
35#define GRAMMAR_LET(r,e) auto r##Let=(e); if(!r##Let) return r##Let.GetError(); auto r=*r##Let;
36
37// Natural numbers and characters.
38using int64 = long long;
39using nat8 = unsigned char;
40using nat16 = unsigned short;
41using nat32 = unsigned int;
42using nat64 = unsigned long long;
43using nat = unsigned long long;
45
46#if defined(__cpp_char8_t)
47 using char8 = char8_t;
48#else
49 // `char8_t` is natively defined since C++20 unless disabled.
50 // If not available, use `char` as a replacement - while `char8_t` is supposed to be unsigned, `unsigned char` is not compatible with u8"" literals.
51 // Though all greater / less than comparisons need to ensure that an unsigned 8-bit number is used.
52 // [The alternative is to have all string literals wrapped in a cast (or macro of a cast) and use `unsigned char`.]
53 // See:
54 // char8_t: A type for UTF-8 characters and strings (Revision 6) - https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html
55 // char8_t backward compatibility remediation - https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1423r3.html
56 using char8 = char;
57#endif
58
59// Basic functions.
60template<class t,nat n> constexpr nat ArraySize(t(&)[n]) {return n;}
61
62// Error.
63[[noreturn]] inline void Err() //-V1082
64{
65#if defined(_MSC_VER)
66 while(true) __debugbreak();
67#else
68 while(true) __builtin_trap();
69#endif
70}
71
72// Trivial type.
73struct nothing {};
74
75// Precedence.
77
78// Associativity.
80
81// Block form.
82enum class form: nat8 {List, Commas};
83
84// Block punctuation.
86
87// Places specializing capture_t generation.
89
90// Modes for calling: none (error if instantiated), of (failure disallowed), at (failure allowed), with (macro).
91enum class mode {None,Open,Closed,With};
92
93// Sets a variable on construction, and restores its previous value on destruction.
94template<typename t> struct scoped_guard {
96 : guard_variable(&_guard_variable)
97 , old_value(_guard_variable)
98 {
99 *guard_variable = new_value;
100 }
102 {
103 if (guard_variable)
104 *guard_variable = old_value;
105 }
106private:
107 t* guard_variable;
108 t old_value;
109};
110
111// Text spans passed around by the parser.
112struct text {
113 const char8 *Start, *Stop;
114 constexpr text(): Start(nullptr), Stop(nullptr) {}
116 constexpr text(const char8* Start0): text(Start0,Start0) {while(*Stop) ++Stop;}
117 #if defined(__cpp_char8_t)
118 text(const char* Start0) : text(reinterpret_cast<const char8*>(Start0), (const char8*)Start0) { while (*Stop) ++Stop; }
119 #endif
120 constexpr char8 operator[](nat i) const {GRAMMAR_ASSERT(Start+i<Stop); return Start[i];}
121 explicit operator bool() const {return Start!=Stop;}
122};
123constexpr nat Length(const text& Text) {
124 return Text.Stop-Text.Start;
125}
126inline bool operator==(const text& as,const text& bs) {
127 if(Length(as)!=Length(bs))
128 return 0;
129 for(nat i=0; i<Length(as); i++)
130 if(as.Start[i]!=bs.Start[i])
131 return 0;
132 return 1;
133}
134inline bool operator!=(const text& as,const text& bs) {
135 return !(as==bs);
136}
137
138// A snippet of text describing its location.
139struct snippet {
143 snippet(): Text(nullptr,nullptr), StartLine(0), StopLine(0), StartColumn(0), StopColumn(0) {}
144 explicit operator bool() const {
145 return bool(Text);
146 }
147private:
148 // Private to ensure all non-empty snippets are within the string passed to the parser.
149 friend struct parser_base;
152};
153
154// Verse blocks.
155template<class syntaxes_t,class capture_t> struct block {
158 snippet BlockSnippet; // Snippet of the whole block.
159 syntaxes_t Specifiers; // Specifiers.
160 capture_t TokenLeading; // If Token, the Scan before it.
161 text Token; // Token preceding opening punctuation.
162 capture_t PunctuationLeading; // After token, before opening punctuation; present only if Punctuation.
163 punctuation Punctuation; // Punctuation wrapping the list.
164 form Form; // Commas or List.
165 syntaxes_t Elements; // Elements.
166 capture_t ElementsTrailing; // Scan between elements and closing punctuation or end.
167 capture_t PunctuationTrailing; // If Punctuation, this holds Space & NewLine trailing it.
168};
169
170// Results consisting of either a value or an error.
171template<class value_t,class error_t> struct result {
172 template<class u,class=decltype(value_t(*(u*)nullptr))> result(const u& Value0): Value(Value0), Success(true) {}
173#if !TIM
174 template<class u, class = decltype(value_t(*(u*)nullptr))> result(u&& Value0) : Value(std::move(Value0)), Success(true) {}
175#endif
176 template<class t0=error_t,class=decltype(t0())> result(): Error(), Success(false) {}
177 result(const error_t& Error0): Error(Error0), Success(false) {}
178 result(const result& Other): Success(Other.Success) {
179 if(Other.Success)
180 new(&Value)value_t(Other.Value);
181 else
182 new(&Error)error_t(Other.Error);
183 }
184 ~result() {if(Success) {Value.~value_t();} else {Error.~error_t();}}
185 operator bool() const {return Success;}
186 result& operator=(const result& R) {if(this!=&R) {this->~result(); new(this)result(R);} return *this;}
187 const value_t& operator*() const {GRAMMAR_ASSERT(Success); return Value;}
188 value_t* operator->() {GRAMMAR_ASSERT(Success); return &Value;}
189 const error_t& GetError() const {GRAMMAR_ASSERT(!Success); return Error;}
190private:
191 union {value_t Value; error_t Error;};
192 bool Success;
193};
194
195//--------------------------------------------------------------------------------------------------------------------------------------------------------------
196// Low-level character classification.
197
198// Verse grammar character classification functions.
199constexpr bool IsSpace (char8 c) {return c==' ' || c=='\t';}
200constexpr bool IsNewLine (char8 c) {return c==0x0D || c==0x0A;}
201constexpr bool IsEnding (char8 c) {return c==0 || c==0x0D || c==0x0A;}
202constexpr bool IsAlpha (char8 c) {return (c>='A'&&c<='Z') || (c>='a'&&c<='z') || c=='_';} // Parentheses `()` required to make static analysis happy
203constexpr bool IsDigit (char8 c) {return c>='0' && c<='9';}
204constexpr bool IsAlnum (char8 c) {return IsAlpha(c) || IsDigit(c);}
205constexpr bool IsHex (char8 c) {return (c>='0'&&c<='9') || (c>='A'&&c<='F') || (c>='a'&&c<='f');}
206constexpr nat8 DigitValue(char8 c) {return (c>='0'&&c<='9')? c-'0': (c>='A'&&c<='F')? c-'A'+10: (c>='a'&&c<='f')? c-'a'+10: 0;}
207constexpr bool IsIdentifierQuotable(char8 c0,char8 c1) {return nat8(c0)>=0x20 && nat8(c0)<=0x7E && c0!='{' && c0!='}' && c0!='"' && c0!='\'' && c0!='\\' && (c0!='<'||c1!='#') && (c0!='#'||c1!='>');}
208constexpr bool IsStringBackslashLiteral(char8 c0,char8 c1) {return c0=='r' || c0=='n' || c0=='t' || c0=='\\' || c0=='"' || c0=='\'' || (c0=='<'&&c1!='#') || c0=='>' || (c0=='#'&&c1!='>') || c0=='&' || c0=='~' || c0=='{' || c0=='}';}
209
210// Convert valid UTF-8 sequence with valid length to its Unicode Code Point.
212 switch(Count) { // Extra `nat8` casts for when `char8` is signed in certain circumstances
213 case 1: return char32( nat8(s[0]) );
214 case 2: return char32((nat32(nat8(s[0]))*0x40 + nat32(nat8(s[1])&0x3F) ) & 0x7FF );
215 case 3: return char32((nat32(nat8(s[0]))*0x1000 + nat32(nat8(s[1])&0x3F)*0x40 + nat32(nat8(s[2])&0x3F) ) & 0xFFFF );
216 case 4: return char32((nat32(nat8(s[0]))*0x40000 + nat32(nat8(s[1])&0x3F)*0x1000 + nat32(nat8(s[2])&0x3F)*0x40 + nat32(nat8(s[3])&0x3F)) & 0x1FFFFF);
217 default: Err();
218 }
219}
220
221// Get length of internal lexical unit recognized for Place.
222// U8 := 0o80..0oBF
223// UTF8 := 0o00..0o7F
224// | 0oC2..0oDF U8
225// | !(0oE0 0o80..0o9F | 0oED 0oA0..0oBF) 0oE0..0oEF U8 U8
226// | !(0oF0 0o80..0o8F | 0oF4 0o90..0oBF) 0oF0..0oF4 U8 U8 U8
227// Printable := 0o09 | !("<#" | "#>" | 0o0..0o1F | 0o7F | 0oC2 0o80..0o9F | 0oE2 0o80 0oA8..0oA9 ) UTF8 | ..
228// Special := '\'|'{'|'}'|'#'|'<'|'>'|'&'|'~'
229// String := .. !('\'|'{'|'}'|'"') Text ..
230// Content := .. !Special Text ..
231template<place Place> nat EncodedLength(const char8* s) {
232 switch(nat8(s[0])) {
233 case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07:
234 case 0x08: case 0x0A: case 0x0B: case 0x0C: case 0x0D: case 0x0E: case 0x0F:
235 case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
236 case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F:
237 case 0x7F:
238 return Place==place::UTF8? 1: 0;
239 case 0x09: case 0x20:
240 return 1;
241 case '"':
242 return Place!=place::Space && Place!=place::String? 1: 0;
243 case '<':
244 return Place==place::UTF8 || (s[1]!='#' && Place!=place::Space && Place!=place::Content)? 1: 0;
245 case '#':
246 return Place==place::UTF8 || (s[1]!='>' && Place!=place::Space)? 1: 0;
247 case '\\': case '{': case '}':
249 case '>': case '&': case '~':
250 return Place!=place::Space && Place!=place::Content? 1: 0;
251 case '!': case '$': case '%': case '\'':case '(': case ')': case '*': case '+': case ',': case '-': case '.': case '/':
252 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
253 case ':': case ';': case '=': case '?': case '@':
254 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M':
255 case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
256 case '[': case ']': case '^': case '_': case '`':
257 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm':
258 case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
259 case '|':
260 return Place!=place::Space? 1: 0;
261 case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: case 0x87:
262 case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E: case 0x8F:
263 case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: case 0x95: case 0x96: case 0x97:
264 case 0x98: case 0x99: case 0x9A: case 0x9B: case 0x9C: case 0x9D: case 0x9E: case 0x9F:
265 case 0xA0: case 0xA1: case 0xA2: case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7:
266 case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF:
267 case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7:
268 case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF:
269 case 0xC0: case 0xC1:
270 case 0xF5: case 0xF6: case 0xF7:
271 case 0xF8: case 0xF9: case 0xFA: case 0xFB: case 0xFC: case 0xFD: case 0xFE: case 0xFF:
272 return 0;
273 case 0xC2:
274 return Place!=place::Space && nat8(s[1])>=0x80&&nat8(s[1])<=0xBF && (Place==place::UTF8 || nat8(s[1])>=0xA0)? 2: 0;
275 case 0xC3: case 0xC4: case 0xC5: case 0xC6: case 0xC7:
276 case 0xC8: case 0xC9: case 0xCA: case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF:
277 case 0xD0: case 0xD1: case 0xD2: case 0xD3: case 0xD4: case 0xD5: case 0xD6: case 0xD7:
278 case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE: case 0xDF:
279 return Place!=place::Space && nat8(s[1])>=0x80&&nat8(s[1])<=0xBF? 2: 0;
280 case 0xE0:
281 return Place!=place::Space && nat8(s[1])>=0xA0&&nat8(s[1])<=0xBF && nat8(s[2])>=0x80&&nat8(s[2])<=0xBF? 3: 0;
282 case 0xE2:
283 if constexpr(Place!=place::Space)
284 if ((nat8(s[1])>=0x80)
285 && (nat8(s[1])<=0xBF)
286 && (nat8(s[2])>=0x80)
287 && (nat8(s[2])<=0xBF)
288 && ((nat8(s[1])!=0x80)
289 || ((nat8(s[2])!=0xA8)
290 && (nat8(s[2])!=0xA9))))
291 return 3;
292 return 0;
293 case 0xE1: case 0xE3: case 0xE4: case 0xE5: case 0xE6: case 0xE7:
294 case 0xE8: case 0xE9: case 0xEA: case 0xEB: case 0xEC: case 0xEE: case 0xEF:
295 return Place!=place::Space && nat8(s[1])>=0x80&&nat8(s[1])<=0xBF && nat8(s[2])>=0x80&&nat8(s[2])<=0xBF? 3: 0;
296 case 0xED:
297 return Place!=place::Space && nat8(s[1])>=0x80&&nat8(s[1])<=0x9F && nat8(s[2])>=0x80&&nat8(s[2])<=0xBF? 3: 0;
298 case 0xF0:
299 return Place!=place::Space && nat8(s[1])>=0x90&&nat8(s[1])<=0xBF && nat8(s[2])>=0x80&&nat8(s[2])<=0xBF && nat8(s[3])>=0x80&&nat8(s[3])<=0xBF? 4: 0;
300 case 0xF1: case 0xF2: case 0xF3:
301 return Place!=place::Space && nat8(s[1])>=0x80&&nat8(s[1])<=0xBF && nat8(s[2])>=0x80&&nat8(s[2])<=0xBF && nat8(s[3])>=0x80&&nat8(s[3])<=0xBF? 4: 0;
302 case 0xF4:
303 return Place!=place::Space && nat8(s[1])>=0x80&&nat8(s[1])<=0x8F && nat8(s[2])>=0x80&&nat8(s[2])<=0xBF && nat8(s[3])>=0x80&&nat8(s[3])<=0xBF? 4: 0;
304 default:
305 return 0;
306 };
307}
308
309//--------------------------------------------------------------------------------------------------------------------------------------------------------------
310// Grammar output.
311// This is not ready for use, but will later provide a Verse and VSON output library and pretty-printing API.
312
313// Grammar output encoding.
323inline bool ParenthesizePrefix(const encoding& Encoding,prec StringPrec) {
324 return StringPrec<Encoding.Prec;
325}
326inline bool ParenthesizePostfix(const encoding& Encoding,prec StringPrec) {
327 return StringPrec<Encoding.Prec || ((Encoding.Prec==prec::Less)&&(StringPrec==prec::Greater));
328}
329
330//--------------------------------------------------------------------------------------------------------------------------------------------------------------
331// Token table.
332
333// Forward declared tokens.
334extern const struct token_set AllTokens,AllowLess,AllowNotEq;
335
336// Token information.
366constexpr token_info Tokens[]={
442};
443
444// Tokens.
445struct token {
447 explicit constexpr token(nat8 Index0): Index(Index0) {}
448 constexpr token(const char8* Op): Index(nat8(ArraySize(Tokens)-1)) {
449 for(; Index>=nat(token::FirstParse()); Index--)
450 for(nat j=0;; j++)
451 if(Tokens[Index].Symbol[j]!=char8(Op[j]))
452 break;
453 else if(!Op[j])
454 return;
455 Index=0;
456 }
457 constexpr operator nat8() const {return Index;}
458 static constexpr token None() {return token(nat8(0));}
459 static constexpr token End() {return token(1);}
460 static constexpr token NewLine() {return token(2);}
461 static constexpr token Alpha() {return token(3);}
462 static constexpr token Digit() {return token(4);}
463 static constexpr token FirstParse() {return token(5);}
464 explicit operator bool() const {
465 return Index!=0;
466 }
467 constexpr const token_info* operator->() const {
468 return &Tokens[Index];
469 }
470};
471
472// A set of tokens.
473struct token_set {
474 constexpr token_set(): Bits{0,0} {}
475 template<class... ts> explicit constexpr token_set(token T,ts... TS): token_set(TS...) {
476 Bits[nat8(T)/64]|=1LL<<(nat8(T)&63);
477 }
478 template<class... ts> explicit constexpr token_set(const char8* S,ts... TS): token_set(token(S),TS...) {}
479 constexpr bool Has(token T) const {
480 return Bits[nat8(T)/64]&(1LL<<(nat8(T)&63));
481 }
482 constexpr explicit operator bool() const {
483 return Bits[0] || Bits[1];
484 }
485 constexpr token_set operator&(const token_set& Other) const {
486 return token_set(Bits[0]&Other.Bits[0],Bits[1]&Other.Bits[1]);
487 }
488 constexpr token_set operator|(const token_set& Other) const {
489 return token_set(Bits[0]|Other.Bits[0],Bits[1]|Other.Bits[1]);
490 }
491 constexpr token_set operator~() const {
492 return token_set(~Bits[0],~Bits[1]);
493 }
494private:
495 constexpr token_set(nat64 Bits0,nat64 Bits1): Bits{Bits0,Bits1} {}
496 nat64 Bits[2];
497};
498
499//--------------------------------------------------------------------------------------------------------------------------------------------------------------
500// Token sets.
501
503inline const token_set AllowLess = ~token_set{u8">",u8">="};
504inline const token_set AllowNotEq = ~token_set{u8">",u8">=",u8"<",u8"<="};
505inline const token_set InPrefixes = token_set{u8":",u8"in"};
506inline const token_set StopList = token_set{u8":)",u8")",u8"]",u8"}",token::NewLine(),token::End()};
507inline const token_set StopExpr = StopList | token_set{u8";",u8","};
508inline const token_set StopFun = StopExpr | token_set{u8"@"};
509inline const token_set StopDef = StopFun | token_set{u8"=>",u8"next",u8"over",u8"when",u8"while"};
510inline const token_set BracePostfixes = token_set{u8"{"};
511inline const token_set BlockPostfixes = token_set{u8"{",u8".",u8":"};
512inline const token_set ParenPostfixes = token_set{u8"("};
513inline const token_set WithPostfixes = token_set{u8"with",u8"<"};
515inline const token_set MarkupPostfixes = token_set{u8",",u8";",u8">",u8":>"};
516inline const token_set DefPostfixes = token_set{u8"=",u8":=",u8"+=",u8"-=",u8"*=",u8"/="};
517
518//--------------------------------------------------------------------------------------------------------------------------------------------------------------
519// Parser.
520
521// Generator-independent base class of parser.
523private:
524 template<class> friend struct parser;
525
526 // A cursor tracks a parsing position in accordance with the Verse grammar,
527 // and a snipping position that attributes NewLine to the Space preceding it.
528 struct cursor {
529 const char8* Pos; // Pointer to current parse position.
530 const char8* LineStart; // Pointer to start of line.
531 const char8* NextLineStart; // If >Pos, indicates we've snipped the NewLine at Pos.
532 token Token; // Token here.
533 nat TokenSize; // Length of token.
534 nat Line; // Zero-based line number.
535 char8 operator[](int64 Offset) const {
536 return Pos[Offset];
537 }
538 bool SnippedNewLine() const {
539 return NextLineStart>Pos;
540 }
541 };
542
543 // A point for producing snippets.
544 struct point {
545 const char8* Pos;
546 nat Line,Column;
547 point(const char8* Pos0,nat Line0,nat Column0): Pos(Pos0), Line(Line0), Column(Column0) {}
548 point(const cursor& Cursor):
549 Pos (Cursor.SnippedNewLine()? Cursor.NextLineStart: Cursor.Pos),
550 Line (Cursor.SnippedNewLine()? Cursor.Line+1: Cursor.Line),
551 Column(Cursor.SnippedNewLine()? 1: nat(Cursor.Pos-Cursor.LineStart+1)) {}
552 static point Start(const snippet& Snippet) {return point{Snippet.Text.Start,Snippet.StartLine,Snippet.StartColumn};}
553 static point Stop (const snippet& Snippet) {return point{Snippet.Text.Stop,Snippet.StopLine,Snippet.StopColumn};}
554 };
555
556 // Grammar context coinciding with "push" and "pop" in the specification.
557 struct context {
558 const char8* BlockInd; // Start of the line that initiated our current indentation, or nullptr.
559 const char8* TrimInd; // BlockInd or a more indented block to specify further text trimming.
560 bool Nest; // Whether we accept lines with equal indentation to BlockInd.
561 bool LinePrefix; // Whether subsequent ScanKey and Commas lines should be prefixed with '&'.
562 context(): BlockInd{u8""}, TrimInd{u8""}, Nest(true), LinePrefix(true) {}
563 };
564
565 // Tokens.
566 nat8 FirstToken [256 ]; // First candidate token per leading char8.
567 nat8 NextToken [ArraySize(Tokens)]; // Next candidate token per token.
568 token ParseToken(const char8* Start,nat& Size) {
569 if(Start[0]==0)
570 return Size=0, token::End();
571 for(nat8 i=FirstToken[nat8(Start[0])]; i; i=NextToken[i]) {
572 if(i<token::FirstParse())
573 return Size=0, token(i);
574 auto Symbol = Tokens[i].Symbol;
575 nat j;
576 for(j=0; Symbol[j] && Start[j]==Symbol[j]; j++);
577 if(Symbol[j] || (IsAlnum(Symbol[0])&&IsAlnum(Start[j])))
578 continue;
579 return Size=j, token(i);
580 }
581 return Size=0, token::None();
582 }
583
584 // State and constructor.
585 cursor Cursor;
586 context Context;
587 nat32 ExprDepth{0};
588 nat32 CommentDepth{0};
589 const nat InputLength;
590 const char8* InputString;
591 parser_base(nat InputLength0,const char8* InputString0,nat Line0=1):
592 FirstToken{}, NextToken{},
594 InputLength(InputLength0), InputString(InputString0) {
595 GRAMMAR_ASSERT(InputString[InputLength]==0);
596 for(nat c=0u; c<128u; c++)
597 FirstToken[c] =
599 IsEnding(char8(c))? token::End():
600 IsAlpha(char8(c))? token::Alpha():
601 IsDigit(char8(c))? token::Digit():
602 token::None();
603 for(auto Token=nat8(token::FirstParse()); Token<ArraySize(Tokens); Token++) {
604 auto& First=FirstToken[nat(Tokens[Token].Symbol[0])];
605 if(First)
606 NextToken[Token]=First;
607 First=token(Token);
608 }
609 }
610
611 // Consumption.
612 void Next(nat n) {
613 while(n--)
614 GRAMMAR_ASSERT(Cursor[0]!=0), Cursor.Pos++;
615 }
616 bool Eat(const char8* s) {
617 nat n;
618 for(n=0; s[n]; n++)
619 if(Cursor[n]!=s[n])
620 return false;
621 return Cursor.Pos+=n, true;
622 }
623 void EatToken() {
624 Cursor.Pos += Cursor.TokenSize;
625 }
626
627 // Snippets.
628 static snippet Snip(const point& Start,const point& Stop) {
629 return snippet{
630 Start.Pos, Stop.Pos,
631 Start.Line, Stop.Line,
632 Start.Column, Stop.Column
633 };
634 }
635 snippet Snip(const point& Start) const {
636 return Snip(Start,Cursor);
637 }
638 snippet Snip() const {
639 return Snip(Cursor,Cursor);
640 }
641 text CursorQuote() {
642 static const text Quote[2]={u8"",u8"\""};
643 const nat8 Cur0 = nat8(Cursor[0]);
644 return Quote[Cur0>0x20 && Cur0!='"' && Cur0<0x7F];
645 }
646 text CursorText() {
647 const nat8 Cur0 = nat8(Cursor[0]);
648
649 // Quoted.
650 if((Cur0=='#'&&Cursor[1]=='>') || (Cur0=='<'&&Cursor[1]=='#'))
651 return text(Cursor.Pos,Cursor.Pos+2);
652 if(IsAlpha(Cur0)) {
653 nat n=1;
654 while(IsAlnum(Cursor[n]))
655 n++;
656 return text(Cursor.Pos,Cursor.Pos+n);
657 }
658 if(Cur0>0x20 && Cur0<=0x7E)
659 return text(Cursor.Pos,Cursor.Pos+1);
660
661 // Not quoted.
662 if(Cur0=='"')
663 return u8"'\"'";
664 else if(Cur0>=128 && EncodedLength<place::Printable>(Cursor.Pos))
665 return u8"unicode character";
666 else if(Cur0>=128)
667 return u8"non-unicode character sequence";
668 else if(Cur0=='\r' || Cur0=='\n')
669 return u8"end of line";
670 else if(Cur0=='\t')
671 return u8"tab";
672 else if(Cur0==' ')
673 return u8"space";
674 else if(Cur0==0)
675 return u8"end of file";
676 else
677 return u8"ASCII control character";
678 }
679};
680
681// Generator-dependent parser.
682template<class gen_t> struct parser: parser_base {
683private:
684 using syntax_t = typename gen_t::syntax_t;
685 using syntaxes_t = typename gen_t::syntaxes_t;
686 using error_t = typename gen_t::error_t;
687 using capture_t = typename gen_t::capture_t;
688 template<class t> using result_t = result<t,error_t>;
689
690 // Constructor.
691 const gen_t& Gen;
692 parser(const gen_t& Gen0,nat n,const char8* Source0,nat StartLine=1):
693 // Accounts for null `Source0` which often occurs with empty files / etc.
694 parser_base(n,Source0?Source0:u8"", StartLine), Gen(Gen0) {}
695
696 // Tracking trailing captures across expressions and their postfixes so we can
697 // assign them to the lexically outermost generator.
698 struct trailing {
699 result<cursor,nothing> TrailingStart;
700 capture_t TrailingCapture;
701 explicit operator bool() const {
702 return bool(TrailingStart);
703 }
704 void MoveFrom(trailing& Source) {
705 GRAMMAR_ASSERT(!TrailingStart);
706 TrailingStart = Source.TrailingStart;
707 TrailingCapture = Source.TrailingCapture;
708 Source.TrailingStart = nothing{};
709 }
710 };
711
712 // Our extended internal block structure tracking block's trailing captures.
713 struct block_t: public block<syntaxes_t,capture_t> {
714 using block<syntaxes_t,capture_t>::block;
715 trailing BlockTrailing;
716 };
717
718 // We track a stack of expressions and postfixes at increasing precedence so that we can
719 // insert multi-precedence postfix operators like '<' and stop subsequent parsing there.
720 // An expr is in one of three states (except mid-update when these invariants don't hold):
721 // - Uninitialized: no ExprSyntax, no Trailing, not Finished.
722 // - Initialized: has ExprSyntax, has Trailing, not Finished.
723 // - Finished: has ExprSyntax, no Trailing, is Finished.
724 struct expr {
725 cursor Start;
726 prec FinishPrec;
727 result<cursor,nothing> Finished;
728 expr* OuterExpr;
729 token_set AllowPostfixes;
730 result<syntax_t,nothing> ExprSyntax;
731 capture_t ExprLeading;
732 trailing Trailing; //-V730_NOINIT
733 result<cursor,nothing> MarkupStart;
734 bool MarkupFinished, ExprStop;
735 struct expr* OuterMarkup;
736 text MarkupTag;
737 expr* QualIdentTarget;
738 expr(prec FinishPrec0,const cursor& Start0,expr* OuterExpr0,token_set AllowPostfixes0=token_set{},expr* QualIdentTarget0=nullptr):
739 Start(Start0), FinishPrec(FinishPrec0),
740 OuterExpr(OuterExpr0), AllowPostfixes(AllowPostfixes0),
741 MarkupFinished(false), ExprStop(false), OuterMarkup(nullptr),
742 QualIdentTarget(QualIdentTarget0) {} //-V730
743 syntax_t operator*() const {
744 return *ExprSyntax;
745 }
746 // Needs a virtual destructor since it has virtual methods or various static analysis will complain
747 virtual ~expr() {}
748 virtual result_t<nothing> OnFinish(parser& /*Parser*/) {
750 GRAMMAR_ASSERT(!OuterExpr || !OuterExpr->Finished);
751 GRAMMAR_ASSERT(Trailing);
752 Finished = *Trailing.TrailingStart;
753 return nothing{};
754 }
755 };
756
757 // Token management.
758 void UpdateToken() {
759 Cursor.Token=ParseToken(Cursor.Pos,Cursor.TokenSize);
760 if(IsAlpha(Cursor.Token->Symbol[0])) {
761 // Key := !Alnum Space !":="
762 // When a reserved word is followed by a definition symbol, we demote it to an identifier
763 // so that simple object notation supports all identifiers including reserved words.
764 cursor KeyStart = Cursor;
765 EatToken();
766 auto SpaceResult = Space();
767 auto IsIdentifier = Cursor.Token==token(u8":=");
768 Cursor = KeyStart;//backtrack but could cache
769 if(SpaceResult && IsIdentifier)
770 Cursor.Token=token::Alpha();
771 }
772 }
773 bool CheckToken() {
774 auto SavedToken=Cursor.Token;
775 UpdateToken();
776 return Cursor.Token==SavedToken;
777 }
778
779 // Errors.
780 result_t<nothing> Require(const char8* Value,error_t(parser::*OnError)(text What)) {
781 if(!Eat(Value))
782 return (this->*OnError)(Value);
783 return nothing{};
784 }
785 result_t<nothing> RequireClose(cursor Start,const char8* Open,const char8* Close,error_t(parser::*OnError)(text)) {
786 if(Eat(Close))
787 return nothing{};
788 else if(!Ending())
789 return (this->*OnError)(Close);
790 else
791 return Cursor=Start, S80(Open);
792 }
793
794 // Snippets.
795 snippet SnipFinished(const cursor& Start,const expr& End) {
796 return Snip(Start,*End.Finished);
797 }
798 snippet SnipFinished(const cursor& Start,const block_t& End) {
799 return Snip(Start,*End.BlockTrailing.TrailingStart);
800 }
801
802 // Trailing capture and snippet management.
803 result_t<nothing> SpaceTrailing(trailing& Trailing) {
804 GRAMMAR_ASSERT(!Trailing);
805 Trailing.TrailingStart=Cursor;
806 GRAMMAR_RUN(Space(Trailing.TrailingCapture));
807 return nothing{};
808 }
809 result_t<nothing> UpdateFrom(expr& Target,trailing& Source,const result_t<syntax_t>& SyntaxResult) {
810 GRAMMAR_ASSERT(Source);
811 GRAMMAR_ASSERT(!Target.Finished && !Target.Trailing);
812 Target.Trailing.MoveFrom(Source);
813 GRAMMAR_SET(Target.ExprSyntax,SyntaxResult);
814 return nothing{};
815 }
816 result_t<nothing> UpdateSpaceTrailing(expr& Target,const result_t<syntax_t>& SyntaxResult) {
817 GRAMMAR_ASSERT(!Target.Finished);
818 GRAMMAR_ASSERT(!Target.Trailing);
819 GRAMMAR_SET(Target.ExprSyntax,SyntaxResult);
820 GRAMMAR_RUN(SpaceTrailing(Target.Trailing));
821 return nothing{};
822 }
823 syntax_t ApplyTrailing(expr& Target,bool FinishingNow=false) {
824 GRAMMAR_ASSERT(!Target.Finished || FinishingNow);
825 GRAMMAR_ASSERT(Target.Trailing);
826 Target.ExprSyntax = Gen.Trailing(*Target,Target.Trailing.TrailingCapture);
827 Target.Trailing = trailing{};
828 return *Target;
829 }
830 void ApplyTrailing(block_t& Block0,const point& TrailingEnd) {
831 if(Block0.Punctuation!=punctuation::None)
832 Gen.CaptureAppend(Block0.PunctuationTrailing,Block0.BlockTrailing.TrailingCapture);
833 else
834 Gen.CaptureAppend(Block0.ElementsTrailing,Block0.BlockTrailing.TrailingCapture);
835 Block0.BlockSnippet = Snip(point::Start(Block0.BlockSnippet),TrailingEnd);
836 Block0.BlockTrailing = trailing{};
837 }
838
839 // Character set and comment and errors:
840 auto S01() {return Gen.Err(Snip(),"S01","Source must be ASCII or Unicode UTF-8 format");}
841 auto S02() {return Gen.Err(Snip(),"S02","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," in block comment");}
842 auto S03() {return Gen.Err(Snip(),"S03","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," in line comment");}
843 auto S04() {return Gen.Err(Snip(),"S04","Block comment beginning at \"<#\" never ends");}
844 auto S05() {return Gen.Err(Snip(),"S05","Ending \"#>\" is outside of block comment");}
845 auto S06() {return Gen.Err(Snip(),"S06","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," in indented comment");}
846
847 // Numeric and numbered character constant errors.
848 auto S15() {return Gen.Err(Snip(),"S15","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," following number.");}
849 auto S16() {return Gen.Err(Snip(),"S15","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," following character.");}
850 auto S18() {return Gen.Err(Snip(),"S18","Character code unit octet must be 1-2 digits in the range 0o0 to 0oFF");}
851 auto S19() {return Gen.Err(Snip(),"S19","Unicode code point must be 1-6 digits in the range 0u0 to 0u10FFFF");}
852
853 // Identifier errors.
854 auto S20(text What) {return Gen.Err(Snip(),"S20","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," or missing identifier following \"",What,"\"");}
855 auto S23(text What) {return Gen.Err(Snip(),"S23","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," or missing \"",What,"\" in qualifier");}
856 auto S24(text What) {return Gen.Err(Snip(),"S24","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," or missing \"",What,"\" in quoted identifier");}
857 auto S25(text What) {return Gen.Err(Snip(),"S25","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," or missing \"",What,"\" in path literal");}
858 auto S26(text What) {return Gen.Err(Snip(),"S26","Missing label in path following \"",What,"\"");}
859
860 // Text errors.
861 auto S30() {return Gen.Err(Snip(),"S30","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," in character literal");}
862 auto S31(text) {return Gen.Err(Snip(),"S31","Missing \"'\" in character literal");}
863 auto S32(text) {return Gen.Err(Snip(),"S32","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," or missing end quote in string literal");}
864 auto S34() {return Gen.Err(Snip(),"S34","Bad character escape \"\\\" followed by ",CursorQuote(),CursorText(),CursorQuote());}
865
866 // Markup errors.
867 auto S40() {return Gen.Err(Snip(),"S40","Missing markup tag preceding ",CursorQuote(),CursorText(),CursorQuote());}
868 auto S41() {return Gen.Err(Snip(),"S41","Bad markup expression preceding ",CursorQuote(),CursorText(),CursorQuote());}
869 auto S42() {return Gen.Err(Snip(),"S42","Unexpected markup end tag outside of markup");}
870 auto S43(text Tag,text Id) {return Gen.Err(Snip(),"S43","Markup started with \"<",Tag,">\" tag but ended in mismatched \"</",Id,">\" tag");}
871 auto S44(text What) {return Gen.Err(Snip(),"S44","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," or missing \"",What,"\" in markup end tag");}
872 auto S46() {return Gen.Err(Snip(),"S46","Expected indented markup following \":>\" but got ",CursorQuote(),CursorText(),CursorQuote());}
873
874 // Markup content errors.
875 auto S51(text What) {return Gen.Err(Snip(),"S51","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," or missing \"",What,"\" in markup");}
876 auto S52(text) {return Gen.Err(Snip(),"S52","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," or missing markup end tag");}
877 auto S54() {return Gen.Err(Snip(),"S54","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," in indented markup");}
878 auto S57() {return Gen.Err(Snip(),"S57","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," or missing ending \";\" or newline following \"&\" markup escape expression");}
879 auto S58() {return Gen.Err(Snip(),"S58","Markup list separator \"~\" is only allowed in markup beginning with \"~\"; elsewhere escape it using \"\\~\"");}
880
881 // Precedence errors.
882 auto S60(text What,text Op) {return Gen.Err(Snip(),"S60","Precedence doesn't allow \"",Op,"\" following \"",What,"\"");}
883 auto S61(text Op) {return Gen.Err(Snip(),"S61","Precedence doesn't allow \"",Op,"\" here");}
884 auto S62() {return Gen.Err(Snip(),"S62","Verse uses 'and', 'or', 'not' instead of '&&', '||', '!'.");};
885 auto S64(text,text Op) {return Gen.Err(Snip(),"S64","Precedence doesn't allow \"",Op,"\" in markup tag expression");}
886 auto S65() {return Gen.Err(Snip(),"S65","Use a=b for comparison, not a==b");}
887 auto S66(text Op) {return Gen.Err(Snip(),"S66","Use 'set' before \"",Op,"\" to update variables");}
888 auto S67() {return Gen.Err(Snip(),"S67","Prefix attribute must be followed by identifier declaration");}
889 auto S68() {return Gen.Err(Snip(),"S68","Use # for line comment, not //");}
890
891 // Bad or missing expression, block, keyword errors.
892 auto S70(text) {return Gen.Err(Snip(),"S70","Expected expression, got ",CursorQuote(),CursorText(),CursorQuote()," at top level of program");}
893 auto S71(text What) {return Gen.Err(Snip(),"S71","Expected expression, got ",CursorQuote(),CursorText(),CursorQuote()," following \"",What,"\"");}
894 auto S74(text) {return Gen.Err(Snip(),"S74","Expected markup tag expression, got ",CursorQuote(),CursorText(),CursorQuote());}
895 auto S76(text What) {return Gen.Err(Snip(),"S76","Expected block, got ",CursorQuote(),CursorText(),CursorQuote()," following \"",What,"\"");}
896 auto S77() {return Gen.Err(Snip(),"S77","Unexpected ",CursorQuote(),CursorText(),CursorQuote()," following expression");}
897 auto S78() {return Gen.Err(Snip(),"S78","Expected <specifier> following \"with\"");}
898 auto S79() {return Gen.Err(Snip(),"S79","Unexpected ",CursorQuote(),CursorText(),CursorQuote(),"or missing \">\" following specifier");}
899
900 // Expression grouping errors:
901 auto S80(text What) {return Gen.Err(Snip(),"S80","Block starting in \"",What,"\" never ends");}
902 auto S81(text What) {return Gen.Err(Snip(),"S81","Expected expression or \"",What,"\", got ",CursorQuote(),CursorText(),CursorQuote()," in parenthesis");}
903 auto S82(text What) {return Gen.Err(Snip(),"S82","Expected expression or \"",What,"\", got ",CursorQuote(),CursorText(),CursorQuote()," in parenthesized parameter list");}
904 auto S83(text What) {return Gen.Err(Snip(),"S83","Expected expression or \"",What,"\", got ",CursorQuote(),CursorText(),CursorQuote()," in bracketed parameters");}
905 auto S84(text What) {return Gen.Err(Snip(),"S84","Expected expression or \"",What,"\", got ",CursorQuote(),CursorText(),CursorQuote()," in braced block");}
906 auto S85(text What) {return Gen.Err(Snip(),"S85","Expected \"",What,"\", got ",CursorQuote(),CursorText(),CursorQuote()," in prefix brackets");}
907 auto S86(text What) {return Gen.Err(Snip(),"S86","Expected expression or \"",What,"\", got ",CursorQuote(),CursorText(),CursorQuote()," in string interpolation");}
908 auto S88(text) {return Gen.Err(Snip(),"S88","Expected expression, got ",CursorQuote(),CursorText(),CursorQuote()," in indented block");}
909 auto S88void() {return Gen.Err(Snip(),"S88","Expected expression, got ",CursorQuote(),CursorText(),CursorQuote()," in indented block");}
910 auto S89() {return Gen.Err(Snip(),"S89","Indentation mismatch: expected ",Context.BlockInd[point(Cursor).Column]==' '? "space": "tab",", got ",CursorQuote(),CursorText(),CursorQuote());}
911
912 // Parser limitations versus spec.
913 auto S97() {return Gen.Err(Snip(),"S97","Unexpected error");}
914 auto S98() {return Gen.Err(Snip(),"S98","Feature is not currently supported");}
915 auto S99() {return Gen.Err(Snip(),"S99","Exceeded maximum expression depth"); }
916
917 // Blank space and indentation.
918 void SnipNewLine(capture_t& Capture,place Place=place::Space) {
919 // If a NewLine is ahead, incorporate it in Capture despite not consuming it per grammar spec.
920 if(!Cursor.SnippedNewLine() && (Cursor[0]==0x0D || Cursor[0]==0x0A)) {
921 auto Start = Cursor;
922 Cursor.NextLineStart = Cursor.Pos+1+(Cursor[0]==0x0D && Cursor[1]==0x0A);
923 Gen.NewLine(Capture,Snip(Start),Place);
924 }
925 }
926 bool NewLine(capture_t& Capture,place Place=place::Space) {
927 // NewLine := 0o0D [0o0A] | 0o0A
928 SnipNewLine(Capture,Place);
929 if(Cursor.SnippedNewLine()) {
930 Cursor.Pos = Cursor.NextLineStart;
931 Cursor.LineStart = Cursor.Pos;
932 Cursor.Line++;
933 return true;
934 }
935 return false;
936 }
937 bool Ending() {
938 // Ending := &(NewLine | end)
939 return Cursor.SnippedNewLine() || IsEnding(Cursor[0]);
940 }
942 // Space := {0o09 | 0o20 | Comment}
944 if(DoSnipNewLine)
945 SnipNewLine(Capture,Place);
946 return UpdateToken(), nothing{};
947 }
949 capture_t Capture;
951 return Capture;
952 }
953 result_t<context> Ind() {
954 // Ind := Ending push; set Nest=false; set BlockInd=LineInd; set LinePrefix=""
955 GRAMMAR_ASSERT(Ending());
956 auto SavedContext = Context;
957 Context.BlockInd = Cursor.LineStart;
958 Context.TrimInd = Cursor.LineStart;
959 Context.Nest = false;
960 return SavedContext;
961 }
962 result_t<nothing> Ded(const context& SavedContext,error_t(parser::*OnError)()) {
963 // Ded := Ending pop
965 if(!Ending())
966 return (this->*OnError)();
967 return UpdateToken(), nothing{};
968 }
970 // Line := NewLine; parse i:={0o09|0o20}; (Ending | !(0o09|0o20) Space
971 // if (i>BlockInd | Nest and i=BlockInd) then set LineInd=ThisInd
972 // else if(not i<=BlockInd ) then error)
973 auto SavedLineEnd = Cursor;
974 if(!NewLine(Capture,Place))
975 return false;
976 auto SavedLineStart = Cursor;
977 while(IsSpace(Cursor[0]) && Cursor[0]==Context.BlockInd[Cursor.Pos-SavedLineStart.Pos])
978 Next(1);
979 bool HasMoreSpace = IsSpace(Cursor[0]);
980 if((HasMoreSpace || Context.Nest) && !IsSpace(Context.BlockInd[Cursor.Pos-SavedLineStart.Pos])) {
981 // This line falls into current indented block, so consume any additional optional TrimIn
982 // and note via Gen.Indent followed by potentially Place-significant Space.
983 while(IsSpace(Cursor[0]) && Cursor[0]==Context.TrimInd[Cursor.Pos-SavedLineStart.Pos])
984 Next(1);
985 Gen.Indent(Capture,Snip(SavedLineStart),Place);
987 return true;
988 }
989 else if(Ending()) {
990 // Blank line whose indentation isn't related to leading.
991 return Gen.BlankLine(Capture,Snip(SavedLineStart),Place), true;
992 }
993 else if(HasMoreSpace) {
994 // Inconsistently indented nonblank line, so error at inconsistency.
995 return S89();
996 }
997 else {
998#if TIM
999 return Cursor=SavedLineEnd, false; // Backtrack but could cache.
1000#else
1001 // NOTE: (yiliang.siew) For indented indcmts, such as:
1002 /*
1003 *
1004 * ```
1005 * a<#>
1006 * b<#>
1007 * c<#>
1008 * d<#>
1009 * <#>
1010 * e<#>
1011 * ```
1012 *
1013 * And so on, the parser will end up treating the entirety of the contents after `a<#>` as an indcmt,
1014 * and recursively do so for the contents after `b<#>` and so on. This can lead to really slow parsing/stack overflow
1015 * should a malicious actor craft Verse syntax to take advantage of this.
1016 * We therefore only capture indented comments up to a certain point and give up; it's highly unlikely anyone would need
1017 * that amount of indentation in their comments.
1018 */
1019 const uint32_t NewCommentDepth = Place == place::IndCmt || Place == place::BlockCmt ? CommentDepth + 1 : CommentDepth;
1021 if (CommentDepth > VERSE_MAX_INDCMT_DEPTH) {
1022 return Cursor=SavedLineEnd, false; //backtrack but could cache
1023 }
1024
1025 // Line that only contains whitespace or comments, but less indented than the current block.
1026 // If we have reached this point, it means that we might be at the start of a comment on the next line, but
1027 // the comment might not have any indentation at all.
1028 capture_t SpaceCapture = {};
1030 if(Cursor.SnippedNewLine()) {
1031 // We need to keep eating until we hit a token that is non-whitespace/comment and has different indentation. If so, backtrack.
1032 // If no backtracking, return `true` and append the capture here.
1033 // If this `Scan` fails to extend the current block because there is already a non-whitespace/comment token there,
1034 // it will already have backtracked to the previous line. But the capture would still be empty since `Scan` would
1035 // return a `nothing{}` value.
1036 capture_t ScanCapture = {};
1037 if (Scan(ScanCapture,Place) && Gen.CaptureLength(ScanCapture) == 0) {
1038 // NOTE: (yiliang.siew) Backtrack here so that the comment can be associated with the correct capture.
1039 return Cursor=SavedLineEnd, false;
1040 }
1041 else {
1042 Gen.CaptureAppend(Capture, SpaceCapture);
1043 return true;
1044 }
1045 }
1046 else {
1047 // Consistent nonblank line from an earlier indented block.
1048 return Cursor=SavedLineEnd, false;//backtrack but could cache
1049 }
1050#endif
1051 }
1052 }
1054 // Scan := Space {Line}
1056 for(;;) {
1057 capture_t LineCapture;
1059 if(!GotLine)
1060 return UpdateToken(), nothing{};
1061
1062 // In place::Content, trim trailing [NewLine Space &('~' | '</')].
1063 if(Place==place::Content && (Cursor[0]=='~' || (Cursor[0]=='<'&&Cursor[1]=='/')))
1064 Gen.MarkupTrim(LineCapture);
1065 Gen.CaptureAppend(Capture,LineCapture);
1066 }
1067 }
1068 result_t<token> ScanKey(capture_t& Capture,token_set TokenSet) {
1069 // This function implements the grammar for Brace and ScanKey [Token1 | Token2 | ..] &Key.
1070 // Brace := Scan '{' List '}' Space
1071 // ScanKey := Space (&NewLine Scan LinePrefix Space | !NewLine)
1072 // Key := !Alnum Space !":="
1073 auto ScanStart = Cursor;
1075 bool Multiline = Ending();
1076 GRAMMAR_RUN(Scan(More));
1077 if(Context.LinePrefix && Multiline && Cursor.Token!=token(u8"{")) {
1078 auto LinePrefixStart=Cursor;
1079 if(Eat(u8"&")) {
1080 Gen.LinePrefix(More,Snip(LinePrefixStart));
1082 if(TokenSet.Has(Cursor.Token))
1083 return Gen.CaptureAppend(Capture,More), Cursor.Token;
1084 }
1085 }
1086 else if(TokenSet.Has(Cursor.Token))
1087 return Gen.CaptureAppend(Capture,More), Cursor.Token;
1088 return Cursor=ScanStart, token::None(); //backtrack but could cache
1089 }
1090
1091 // Constants and base expressions.
1092 result_t<nat> ParseHex(nat MaxDigits,nat MaxValue,error_t(parser::*OnError)()) {
1093 nat i=0;
1094 while(IsHex(Cursor[0])) {
1095 if(MaxDigits-->0) {
1096 auto i0=i;
1097 i=i*16+DigitValue(Cursor[0]);
1098 if(i<=MaxValue && i/16==i0) {
1099 Next(1);
1100 continue;
1101 }
1102 }
1103 return (this->*OnError)();
1104 }
1105 return i;
1106 }
1107 result_t<nothing> DisallowDotAlnum() {
1108 bool GotDot=Cursor[0]=='.';
1109 if(IsAlnum(Cursor[GotDot]))
1110 return S15();
1111 return nothing{};
1112 }
1113 result_t<nothing> DisallowDotNum() {
1114 bool GotDot=Cursor[0]=='.';
1115 if(IsDigit(Cursor[GotDot]))
1116 return S15();
1117 return nothing{};
1118 }
1120 // Exp := [('e'|'E') ['+'|'-'] Digits] !(('e'|'E') ('+'|'-'|Digit))
1121 // Units := [Alpha {Alpha}] !Alpha
1122 // Num := !(("0b"|"0o"|"0u"|"0x") Hex) Digits ['.' Digits] Exp Units) !('.' Digits)
1123 // | ("0x" Hex {Hex} !('.' Alnum)
1124 auto Start=Cursor;
1125 GRAMMAR_ASSERT(IsDigit(Cursor[0]));
1126 if(Cursor[0]=='0' && Cursor[1]=='x' && IsHex(Cursor[2])) {
1127 GRAMMAR_ASSERT(Cursor[0]=='0'&&Cursor[1]=='x'&&IsHex(Cursor[2]));
1128 Next(2);
1129 do {Next(1);} while(IsHex(Cursor[0]));
1130 // Could use `DisallowDotNum()` which would then permit extension function on hex literals - `0xff.ShiftRight()`
1131 // Can still wrap hex literals with parentheses - `(0xff).ShiftRight()`
1132 GRAMMAR_RUN(DisallowDotAlnum());
1133 return Gen.NumHex(Snip(Start),text(Start.Pos+2,Cursor.Pos));
1134 }
1135 while(IsDigit(Cursor[0]))
1136 Next(1);
1137 text Digits(Start.Pos,Cursor.Pos),FractionalDigits(Cursor.Pos+1,Cursor.Pos+1);
1138 if(Cursor[0]=='.' && IsDigit(Cursor[1])) {
1139 Next(2);
1140 while(IsDigit(Cursor[0]))
1141 Next(1);
1142 FractionalDigits.Stop=Cursor.Pos;
1143 }
1144 text ExponentSign,Exponent;
1145 if(Cursor[0]=='e' || Cursor[0]=='E') {
1146 int64 HasExponentSign = int64(Cursor[1]=='+' || Cursor[1]=='-');
1147 if(IsDigit(Cursor[1+HasExponentSign])) {
1148 ExponentSign=text(Cursor.Pos+1,Cursor.Pos+1+HasExponentSign);
1150 Exponent.Start=Cursor.Pos;
1151 while(IsDigit(Cursor[0]))
1152 Next(1);
1153 Exponent.Stop=Cursor.Pos;
1154 }
1155 }
1156 GRAMMAR_LET(Result,Gen.Num(Snip(Start),Digits,FractionalDigits,ExponentSign,Exponent));
1157 if(IsAlpha(Cursor[0])) {
1158 auto Pos0=Cursor.Pos;
1159 do Next(1);
1160 while(IsAlnum(Cursor[0]));
1161 GRAMMAR_SET(Result,Gen.Units(Snip(Start),Result,text(Pos0,Cursor.Pos)));
1162 }
1163 // Disallow extra dot digit and allow dot alpha so extension functions ['.' Ident] can be called on number literals
1164 GRAMMAR_RUN(DisallowDotNum());
1165 return Result;
1166 }
1167 result_t<syntax_t> CharLit() {
1168 // Special := '\'|'{'|'}'|'#'|'<'|'>'|'&'|'~'
1169 // CharEsc := '\' ('r'|'n'|'t'|'''|'"'|Special)
1170 // CharLit := ''' Printable ''' !''' | ''' CharEsc '''
1171 GRAMMAR_ASSERT(Cursor[0]=='\'');
1172 auto Start=Cursor;
1173 Next(1);
1175 if(!n)
1176 return S30();
1177 auto Char32 = EncodedChar32(Cursor.Pos,n);
1178 auto Backslash = Cursor[0]=='\\' && Cursor[1] && Cursor[2]=='\'';
1179 if(Backslash) {
1180 Next(1);
1181 if(IsStringBackslashLiteral(Cursor[0],Cursor[1])) {
1182 Char32=char32(Cursor[0]=='r'? '\r': Cursor[0]=='n'? '\n': Cursor[0]=='t'? '\t': Cursor[0]);
1183 Backslash=1;
1184 Next(n);
1185 }
1186 else return S34();
1187 }
1188 else Next(n);
1189 GRAMMAR_RUN(Require(u8"'",&parser::S31));
1190 return Gen.Char32(Snip(Start),Char32,false,Backslash);
1191 }
1192 result_t<char8> Char8() {
1193 // Char8 := "0o" (Hex) [Hex] !Alnum
1194 GRAMMAR_ASSERT(Cursor[0]=='0'&&Cursor[1]=='o'&&IsHex(Cursor[2]));
1195 Next(2);
1196 GRAMMAR_LET(n,ParseHex(2,0xFFULL,&parser::S18));
1197 if(IsAlnum(Cursor[0]))
1198 return S16();
1199 return char8(n);
1200 }
1201 result_t<char32> Char32() {
1202 // Char32 := "0u" ("10" | Hex) [Hex] [Hex] [Hex] [Hex]) !Alnum
1203 GRAMMAR_ASSERT(Cursor[0]=='0'&&Cursor[1]=='u'&&IsHex(Cursor[2]));
1204 Next(2);
1205 GRAMMAR_LET(n,ParseHex(6,0x10FFFFULL,&parser::S19));
1206 if(IsAlnum(Cursor[0]))
1207 return S16();
1208 return char32(n);
1209 }
1210 result_t<text> Ident() {
1211 // Ident := Alpha {Alnum} !Alnum ["'" {!('<#'|'#>'|'\'|'{'|'}'|'"'|''') 0o20-0o7E} "'"]
1212 GRAMMAR_ASSERT(IsAlpha(Cursor[0]));
1213 auto Pos0=Cursor.Pos;
1214 do Next(1);
1215 while(IsAlnum(Cursor[0]));
1216 if(!Eat(u8"'"))
1217 return text(Pos0,Cursor.Pos);
1218 // Ensure not reading past string and determine if quotable
1219 while((Cursor[0] != '\0') && IsIdentifierQuotable(Cursor[0], Cursor[1]))
1220 Next(1);
1221 GRAMMAR_RUN(Require(u8"'",&parser::S24));
1222 return text(Pos0,Cursor.Pos);
1223 }
1224 result_t<text> Path() {
1225 // Path := '/' Label ('@' Label | !'@')] {'/' ['(' Path ':)'] Ident} !'/'
1226 auto Start=Cursor;
1227 GRAMMAR_RUN(Require(u8"/",&parser::S25));
1228 if(Cursor[0]=='/' || (Cursor[0]==' ' && Cursor.Pos>InputString && Cursor[-1]=='/'))
1229 return S68();
1230 GRAMMAR_RUN(Label(u8"/"));
1231 if(Eat(u8"@"))
1232 GRAMMAR_RUN(Label(u8"@"));
1233 while(Eat(u8"/")) {
1234 text What=u8"/";
1235 if(Eat(u8"(")) {
1236 GRAMMAR_RUN(Path());
1237 GRAMMAR_RUN(Require(u8":)",&parser::S25));
1238 What=u8":)";
1239 }
1240 if(IsAlpha(Cursor[0])) {
1241 GRAMMAR_RUN(Ident());
1242 continue;
1243 }
1244 return S20(What);
1245 }
1246 if(Cursor[0]!='/')
1247 return text(Start.Pos,Cursor.Pos);
1248 return S25(u8"/");
1249 }
1250 result_t<text> Label(text What) {
1251 // Label := Alnum {Alnum|'-'|'.'} !(Alnum|'-'|'.')
1252 auto Pos0=Cursor.Pos;
1253 if(IsAlnum(Cursor[0])) {
1254 Next(1);
1255 while(IsAlnum(Cursor[0]) || Cursor[0]=='-' || Cursor[0]=='.')
1256 Next(1);
1257 return text(Pos0,Cursor.Pos);
1258 }
1259 return S26(What);
1260 }
1261
1262 // Text processing.
1263 result_t<capture_t> LineCmt() {
1264 // LineCmt := '#' !'>' {Text} Ending
1265 GRAMMAR_ASSERT(Cursor[0]=='#');
1266 Next(1);
1267 capture_t Capture;
1269 if(Ending())
1270 return Capture;
1271 else
1272 return S03();
1273 }
1274 result_t<capture_t> BlockCmt() {
1275 // BlockCmt := "<#" !'>' {Text|NewLine} !'<' "#>"
1276 GRAMMAR_ASSERT(Cursor[0]=='<'&&Cursor[1]=='#'&&Cursor[2]!='>');
1277 auto Start=Cursor;
1278 Next(2);
1279 capture_t Capture;
1281 if(Cursor[0]=='#' && Cursor[1]=='>')
1282 return Next(2), Capture;
1283 else if(Cursor[0]==0)
1284 return Cursor=Start, S04();
1285 else
1286 return S02();
1287 }
1288 result_t<capture_t> IndCmt() {
1289 // IndCmt := "<#>" {Text} Ind {Text|Line} Ded
1290 GRAMMAR_ASSERT(Cursor[0]=='<'&&Cursor[1]=='#'&&Cursor[2]=='>');
1291 Next(3);
1292 capture_t Capture;
1294 if(Ending()) {
1297 GRAMMAR_RUN(Ded(SavedContext,&parser::S06));
1298 // NOTE: (yiliang.siew) We don't want to snip a newline here, because that means that an indcmt like:
1299 /*
1300 * <#>indcmt
1301 * indcmt_frag
1302 * stub{}
1303 *
1304 */
1305 // Ends up getting an extra newline snipped as part of its comment string capture, which wouldn't make sense
1306 // since indcmts must always have a newline after anyway.
1307 // GRAMMAR_RUN(Space(Capture,place::IndCmt));
1308 return Capture;
1309 }
1310 else return S06();
1311 }
1312 template<place ParsePlace> result_t<nothing> Text(capture_t& Capture,place GenPlace=ParsePlace) {
1313 // Text := Printable | BlockCmt | "<#>"
1314 // LineCmt := '#' !'>' {Text} Ending
1315 // BlockCmt := "<#" !'>' {Text|NewLine} !'<' "#>"
1316 // IndCmt := "<#>" {Text} Ind {Text|Line} Ded
1317 // Space := {0o09 | 0o20 | Comment}
1318 // String := '"' {.. | CharEsc | !('\'|'{'|'}'|'"') Text} '"'
1319 // Content := {.. | Comment | Line | CharEsc | .. | !Special Text}
1320 // CharEsc := '\' ('r'|'n'|'t'|'''|'"'|Special)
1321 for(;;) {
1322 auto Start=Cursor;
1323 for(nat n; (n=EncodedLength<ParsePlace>(Cursor.Pos))!=0;)
1324 Next(n);
1325 if(Cursor.Pos!=Start.Pos)
1326 Gen.Text(Capture,Snip(Start),GenPlace);
1327 auto SpecialStart=Cursor;
1328 switch(Cursor[0]) {
1329 case '\r': case '\n':
1332 if(Ending())
1333 return nothing{};
1334 continue;
1335 }
1336 else if constexpr(ParsePlace==place::BlockCmt) {
1338 continue;
1339 }
1340 else return nothing{};
1341 case '#':
1342 if(Cursor[1]!='>') {
1343 GRAMMAR_LET(Commentary,LineCmt());
1344 Gen.LineCmt(Capture,Snip(SpecialStart),GenPlace,Commentary);
1345 continue;
1346 }
1347 else if constexpr(ParsePlace==place::BlockCmt)
1348 return nothing{};
1349 else
1350 return S05();
1351 case '<':
1352 if(Cursor[1]!='#') {
1353 return nothing{};
1354 }
1355 else if(Cursor[2]!='>') {
1356 GRAMMAR_LET(Commentary,BlockCmt());
1357 Gen.BlockCmt(Capture,Snip(SpecialStart),GenPlace,Commentary);
1358 continue;
1359 }
1361 GRAMMAR_LET(Commentary,IndCmt());
1362 Gen.IndCmt(Capture,Snip(SpecialStart),GenPlace,Commentary);
1363 continue;
1364 }
1365 else {
1366 Next(3);
1367 Gen.Text(Capture,Snip(SpecialStart),GenPlace);
1368 continue;
1369 }
1370 case '\\':
1371 // Parse a constant escape.
1372 // Special := '\'|'{'|'}'|'#'|'<'|'>'|'&'|'~'
1373 // CharEsc := '\' ('r'|'n'|'t'|'''|'"'|Special)
1375 Next(1);
1376 if(Cursor[0] && IsStringBackslashLiteral(Cursor[0],Cursor[1])) {
1377 auto Backslashed = Cursor[0];
1378 Next(1);
1379 Gen.StringBackslash(Capture,Snip(SpecialStart),GenPlace,Backslashed);
1380 continue;
1381 }
1382 else return S34();
1383 }
1384 default:
1385 return nothing{};
1386 }
1387 }
1388 }
1389 result_t<block_t> Interp() {
1390 // Interp := '{' List '}'
1391 GRAMMAR_ASSERT(Cursor[0]=='{');
1392 auto Start=Cursor;
1393 Next(1);
1394 GRAMMAR_LET(Block0,List(u8"}",&parser::S86,Cursor,capture_t(),punctuation::None,Cursor));
1395 GRAMMAR_RUN(RequireClose(Start,u8"{",u8"}",&parser::S86));
1396 return Block0;
1397 }
1398 result_t<block_t> Ampersand() {
1399 // Ampersand := push; parse LinePrefix='&'; Space Def (';'|Ending); pop
1400 GRAMMAR_ASSERT(Cursor[0]=='&');
1401 Next(1);
1402 auto ExprStart = Cursor;
1403 GRAMMAR_LET(Leading,Space());
1404 auto SavedContext = Context;
1405 Context.LinePrefix = true;
1406 GRAMMAR_LET(Block0,WhenExpr(u8"&",prec::Def,prec::Def,nullptr,Leading,[&](expr& Expr)->result_t<block_t> {
1407 ApplyTrailing(Expr,true);
1408 auto SemicolonStart = Cursor;
1409 bool Semicolon = Eat(u8";");
1410 auto Block0 = SingletonBlock(ExprStart,Expr);
1411 if(!Ending() && !Semicolon)
1412 return S57();
1413 if(Semicolon)
1414 Gen.Semicolon(Block0.ElementsTrailing,Snip(SemicolonStart));
1415 ApplyTrailing(Block0,Cursor);
1416 return Block0;
1417 },AllTokens));
1419 return Block0;
1420 }
1421 template<place Place> result_t<syntaxes_t> String(cursor TextStart,capture_t Leading=capture_t()) {
1422 // String := '"' {Interp | CharEsc | !('\'|'{'|'}'|'"') Text} '"'
1423 // Content := {Interp | CharEsc | Markup | Ampersand | Comment | Line | !Special Text}
1424 syntaxes_t Splices;
1425 for(;;) {
1426 GRAMMAR_RUN(Text<Place>(Leading));
1427 if(Cursor.Pos!=TextStart.Pos) {
1428 GRAMMAR_LET(S,Gen.StringLiteral(Snip(TextStart),Leading));
1429 Gen.SyntaxesAppend(Splices,S);
1430 }
1431 auto SpecialStart=Cursor;
1432 switch(Cursor[0]) {
1433 case '{': {
1434 GRAMMAR_LET(Block0,Interp());
1435 GRAMMAR_LET(S,Gen.StringInterpolate(Snip(SpecialStart),Place,1,Block0));
1436 Gen.SyntaxesAppend(Splices,S);
1437 break;
1438 }
1439 case '&': {
1440 GRAMMAR_LET(Block0,Ampersand());
1441 GRAMMAR_LET(S,Gen.StringInterpolate(Snip(SpecialStart),Place,0,Block0));
1442 Gen.SyntaxesAppend(Splices,S);
1443 break;
1444 }
1445 case '<':
1446 // Markup := '<' Tags ..
1447 // Tags := Space (!'/' ..) ..
1448 if(Cursor[1]!='/') {
1449 GRAMMAR_LET(e,Markup());
1450 Gen.SyntaxesAppend(Splices,e);
1451 break;
1452 }
1453 [[fallthrough]];
1454 default:
1455 return Splices;
1456 }
1457 TextStart=Cursor;
1458 Leading=capture_t();
1459 }
1460 }
1461
1462 // Markup content.
1463 result_t<syntax_t> Contents(bool TrimLeading) {
1464 // Contents := Scan (Content | '~' Content {'~' Content})
1465 auto Start=Cursor;
1466 GRAMMAR_LET(Leading,Space(place::Content)); // If TrimLeading, trim leading [Space NewLine].
1467 if(TrimLeading && Ending())
1468 Gen.MarkupTrim(Leading);
1469 GRAMMAR_RUN(Scan(Leading,place::Content));
1470 if(Cursor[0]!='~') {
1472 if(Cursor[0]=='~')
1473 return S58();
1474 return Gen.Content(Snip(Start),Splices);
1475 }
1476 else {
1477 Next(1);
1478 Gen.MarkupTrim(Leading); // Trim everything before ~.
1479 syntaxes_t Results;
1480 do {
1481 auto ElementStart=Cursor;
1483 GRAMMAR_LET(S,Gen.Content(Snip(ElementStart),Splices));
1484 Gen.SyntaxesAppend(Results,S);
1485 }
1486 while(Eat(u8"~"));
1487 return Gen.Contents(Snip(Start),Leading,Results);
1488 }
1489 }
1490 result_t<syntax_t> Trimmed(bool TrimLeading) {
1491 // We push and set TrimInd to LineInd so markup can precisely trim according to LineStart.
1492 auto SavedContext = Context;
1493 Context.TrimInd = Cursor.LineStart;
1494 Context.Nest = true;
1495 GRAMMAR_LET(Result,Contents(TrimLeading));
1497 return Result;
1498 }
1499
1500 // Blocks.
1501 block_t SingletonBlock(const snippet& Snippet,const syntax_t& Syntax,const capture_t& PunctuationLeading=capture_t(),punctuation Punctuation=punctuation::None) {
1502 block_t Block0(Snippet);
1503 Block0.PunctuationLeading = PunctuationLeading;
1504 Block0.Punctuation = Punctuation;
1505 Gen.SyntaxesAppend(Block0.Elements,Syntax);
1506 return Block0;
1507 }
1508 block_t SingletonBlock(const cursor& BlockStart,expr& Expr,const capture_t& PunctuationLeading=capture_t(),punctuation Punctuation=punctuation::None) {
1509 auto Block0=SingletonBlock(SnipFinished(BlockStart,Expr),*Expr,PunctuationLeading,Punctuation);
1510 Block0.BlockTrailing.MoveFrom(Expr.Trailing);
1511 return Block0;
1512 }
1513 result_t<block_t> IndList(cursor Start,const capture_t& PunctuationLeading,punctuation Punctuation,cursor LeadingStart,const capture_t& Leading=capture_t()) {
1514 // Ind List Ded
1516 GRAMMAR_LET(Block0,List(u8"",&parser::S88,Start,PunctuationLeading,Punctuation,LeadingStart,Leading));
1517 GRAMMAR_RUN(Ded(SavedContext,&parser::S88void));
1518 GRAMMAR_RUN(SpaceTrailing(Block0.BlockTrailing));
1519 return Block0;
1520 }
1521 result_t<block_t> BlockHelper(text What,prec Prec,expr& OuterExpr,cursor BlockStart,capture_t PunctuationLeading,
1522 bool AllowOpen,bool AllowInd,bool AllowCommas,bool* Fails=nullptr) {
1523 // Brace := Scan '{' List '}' Space
1524 // Block := Brace | DotSpace Space Def Space | (DotSpace | ':') Space Ind List Ded
1525 // BraceInd := Brace | Ind List Ded
1526 // DotSpace := '.' (0o09 | 0o20 | Ending) Space
1527 switch(nat8(Cursor.Token)) {
1528 case token::NewLine(): case token::End(): {
1529 GRAMMAR_LET(ScanToken,ScanKey(PunctuationLeading,BracePostfixes));
1530 if(!ScanToken) {
1531 if(AllowInd)
1532 return IndList(BlockStart,PunctuationLeading,punctuation::Ind,Cursor);
1533 goto bad;
1534 }
1535 [[fallthrough]];
1536 }
1537 case token(u8"{"): {
1538 auto BraceStart=Cursor;
1539 EatToken();
1540 GRAMMAR_LET(Block0,List(u8"}",&parser::S84,Cursor,PunctuationLeading,punctuation::Braces,Cursor));
1541 GRAMMAR_RUN(RequireClose(BraceStart,u8"{",u8"}",&parser::S84));
1542 Block0.BlockSnippet=Snip(BlockStart);
1543 GRAMMAR_RUN(SpaceTrailing(Block0.BlockTrailing));
1544 return Block0;
1545 }
1546 case token(u8"."): {
1547 if(AllowOpen && (IsSpace(Cursor[1]) /*|| IsEnding(Cursor[1])*/)) {
1548 EatToken();
1549 //auto MiddleStart=Cursor;
1550 GRAMMAR_LET(Middle,Space());
1551 /*if(Ending())
1552 return IndList(BlockStart,PunctuationLeading,punctuation::Dot,MiddleStart,Middle);*/
1553 return WhenExpr(What,prec::Def,prec::Def,&OuterExpr,Middle,[&](expr& Right)->result_t<block_t> {
1554 return SingletonBlock(BlockStart,Right,PunctuationLeading,punctuation::Dot);
1555 });
1556 }
1557 goto bad;
1558 }
1559 case token(u8":"): {
1560 if(AllowOpen) {
1561 auto ColonStart=Cursor;
1562 EatToken();
1563 auto MiddleStart=Cursor;
1564 GRAMMAR_LET(Middle,Space());
1565 if(Ending())
1566 return IndList(BlockStart,PunctuationLeading,punctuation::Colon,MiddleStart,Middle);
1567 Cursor=ColonStart; //backtrack colon and space, then fall through.
1568 }
1569 [[fallthrough]];
1570 }
1571 default:
1572 if(Prec!=prec::Nothing) {
1573 if(AllowCommas)
1574 return Commas(What,Prec,BlockStart,PunctuationLeading,&parser::S71);
1575 else
1576 return WhenExpr(What,Prec,Prec,&OuterExpr,PunctuationLeading,[&](expr& Right)->result_t<block_t> {
1577 return SingletonBlock(BlockStart,Right);
1578 });
1579 }
1580 bad:
1581 if(!Fails)
1582 return S71(What);
1583 else
1584 return *Fails=true, block_t{};
1585 }
1586 }
1587 result_t<block_t> Block(text What,expr& OuterExpr,cursor BlockStart,const capture_t& PunctuationLeading,bool& Fails) {
1588 // Block := Brace | DotSpace Space Def Space | (DotSpace | ':') Space Ind List Ded
1589 return BlockHelper(What,prec::Nothing,OuterExpr,BlockStart,PunctuationLeading,true,false,false,&Fails);
1590 }
1591 result_t<block_t> BraceInd(text What,prec Prec,expr& OuterExpr) {
1592 // BraceInd := Brace | Ind List Ded
1593 auto BlockStart=Cursor;
1594 GRAMMAR_LET(PunctuationLeading,Space());
1595 return BlockHelper(What,Prec,OuterExpr,BlockStart,PunctuationLeading,false,true,false);
1596 }
1597 result_t<block_t> KeyBlock(prec Prec,expr& OuterExpr,cursor BlockStart,const capture_t& TokenLeading,text Token,const capture_t& PunctuationLeading) {
1598 // KeyBlock := Block
1599 GRAMMAR_LET(Block0,BlockHelper(Token,Prec,OuterExpr,BlockStart,PunctuationLeading,true,false,false));
1600 Block0.Token = Token;
1601 Block0.TokenLeading = TokenLeading;
1602 return Block0;
1603 }
1604 result_t<block_t> KeyBlockDefs(expr& OuterExpr,cursor BlockStart,const capture_t& TokenLeading,text Token) {
1605 // Defs := Def {Space ',' Scan Def}
1606 GRAMMAR_LET(PunctuationLeading,Space());
1607 GRAMMAR_LET(Block0,BlockHelper(Token,prec::Def,OuterExpr,BlockStart,PunctuationLeading,true,false,true));
1608 Block0.Token = Token;
1609 Block0.TokenLeading = TokenLeading;
1610 return Block0;
1611 }
1612 template<class f> result_t<nothing> WhenBraceCall(const char8* What,prec Prec,expr& OuterExpr,const f& F) {
1613 // Brace := Scan '{' List '}' Space
1614 // Prefix := Call | .. Space (Brace | Prefix)
1615 // Takes a callback because things like +a<b preemptively invoke OnFinish.
1616 auto BlockStart=Cursor;
1617 GRAMMAR_LET(PunctuationLeading,Space());
1618 if(Cursor.Token==token(u8"{") || Cursor.Token==token::NewLine()) {
1619 GRAMMAR_LET(RightBlock,BlockHelper(What,Prec,OuterExpr,BlockStart,PunctuationLeading,false,false,false));
1620 return F(RightBlock);
1621 }
1622 else return WhenExpr(What,Prec,Prec,&OuterExpr,PunctuationLeading,[&](expr& RightExpr)->result_t<nothing> {
1623 auto RightBlock=SingletonBlock(BlockStart,RightExpr);
1624 return F(RightBlock);
1625 });
1626 }
1627
1628 // Qualified identifiers.
1629 result_t<syntax_t> QualIdentQualified(expr& Target,const cursor& Start,block_t& Block0) {
1630 GRAMMAR_RUN(Space(Block0.PunctuationTrailing));
1631 Block0.BlockSnippet = Snip(Start);
1632 Block0.Punctuation = punctuation::Qualifier;
1633 if(IsAlpha(Cursor[0])) {
1634 GRAMMAR_LET(Id,Ident());
1635 Target.MarkupTag=Id;
1636 return Gen.QualIdent(Snip(Start),Block0,Id);
1637 }
1638 else return S23(u8":)");
1639 }
1640 result_t<syntax_t> QualIdent(text What,expr& Target,bool AllowParenthesis) {
1641 // QualIdent := ['(' List ':)' Space] Ident
1642 auto Start=Cursor;
1643 if(IsAlpha(Cursor[0])) {
1644 GRAMMAR_LET(Id,Ident());
1645 Target.MarkupTag=Id;
1646 return Gen.Ident(Snip(Start),Id,u8"",u8"");
1647 }
1648 else if(Cursor[0]=='(') {
1649 EatToken();
1650 GRAMMAR_LET(Block0,List(u8")",&parser::S81,Cursor,capture_t(),punctuation::Parens,Cursor));
1651 if(Eat(u8":)"))
1652 return QualIdentQualified(Target,Start,Block0);
1653 else if(AllowParenthesis) {
1654 GRAMMAR_RUN(RequireClose(Start,u8"(",u8")",&parser::S81));
1655 Block0.BlockSnippet = Snip(Start);
1656 return Gen.Parenthesis(Block0);
1657 }
1658 else return S23(u8":)");
1659 }
1660 return S20(What);
1661 }
1662
1663 // Macro invocations and constructs that lead with same syntax like '(', '<', 'with'.
1664 struct call {
1665 text CallWhat;
1666 cursor CallTrailingStop;
1667 mode CallMode;
1668 block_t& CallParameter;
1669 call* OuterCall = nullptr; // Initialized to keep static analysis happy
1670 };
1671 struct invoke: expr {
1672 text What;
1673 token StartToken;
1674 token_set InTokens, PostTokens;
1675 call *FirstCall, *LastCall;
1676 call* Of;
1677 block_t* Clauses[3];
1678 block_t* PriorClause;
1679 invoke(text What0,expr& OuterExpr0,cursor Start0,token StartToken0,token_set InTokens0,token_set PostTokens0,call* FirstCall0=nullptr,call* LastCall0=nullptr):
1681 What(What0), StartToken(StartToken0),
1682 InTokens(InTokens0), PostTokens(PostTokens0),
1683 FirstCall(FirstCall0), LastCall(LastCall0),
1684 Of(nullptr), Clauses{nullptr,nullptr,nullptr}, PriorClause(nullptr) {}
1685 void UpdateLastCall(call* NewCall) {
1686 if(LastCall)
1687 LastCall->OuterCall = NewCall;
1688 else
1689 FirstCall = NewCall;
1690 LastCall = NewCall;
1691 }
1692 result_t<nothing> OnFinish(parser& Parser) override {
1693 Parser.CheckToken();
1694 this->Trailing = trailing{
1695 LastCall
1696 ? *LastCall->CallParameter.BlockTrailing.TrailingStart
1697 : PriorClause
1698 ? *PriorClause->BlockTrailing.TrailingStart:
1699 Parser.Cursor,
1700 capture_t()};
1701 GRAMMAR_RUN(expr::OnFinish(Parser));
1702 if(Clauses[0]) {
1703 GRAMMAR_ASSERT(PriorClause);
1704 // Generate this macro invocation.
1705 GRAMMAR_RUN(Parser.UpdateFrom(*this->OuterExpr,PriorClause->BlockTrailing,Parser.Gen.Invoke(
1706 Parser.SnipFinished(this->Start,*PriorClause),
1707 Parser.ApplyTrailing(*this->OuterExpr),
1708 *Clauses[0],Clauses[1],Clauses[2])));
1709
1710 // Handle remaining calls on the stack now with another Invoke.
1711 if(!FirstCall) // Disable this to check soundness of logic below.
1712 return nothing{};
1713 invoke NewTarget{u8"nested macro invocation",*this->OuterExpr,this->Start,token::None(),
1714 token_set{u8"do"},token_set{u8"until",u8"catch"},FirstCall,LastCall};
1715 if(!this->ExprStop)
1716 return Parser.Invoke(NewTarget,Parser.Cursor,capture_t());
1717 else
1718 return NewTarget.OnFinish(Parser);
1719 }
1720 else if(!StartToken) {
1721 // Not a macro, and a macro isn't required, so flush accumulated call and specifiers
1722 // to the nearest outer prec::Call, needed for if{a}else if{b}<c> associating as (if{a}else if{b})<c>.
1723 if (!this->OuterExpr) { return nothing{}; } // This should never occur - though without it some C++ semantic analysis checkers get upset when passing to FinishExpr() below.
1724 GRAMMAR_LET(InsertCall,Parser.FinishExpr(token::None(),prec::Call,*this->OuterExpr));
1725 if(!InsertCall)
1726 return Parser.S61(FirstCall? FirstCall->CallWhat: u8"macro end");
1727 for(auto Call=FirstCall; Call; Call=Call->OuterCall) {
1728 Call->CallParameter.BlockSnippet=Snip(point::Start(Call->CallParameter.BlockSnippet),*Call->CallParameter.BlockTrailing.TrailingStart);
1729 GRAMMAR_RUN(Parser.UpdateFrom(*InsertCall,Call->CallParameter.BlockTrailing,Parser.Gen.Call(
1730 Snip(InsertCall->Start,point::Stop(Call->CallParameter.BlockSnippet)),Call->CallMode,
1731 Parser.ApplyTrailing(*InsertCall),Call->CallParameter)));
1732 }
1733 return nothing{};
1734 }
1735 else return Parser.S76(What); // Error for reserved word not followed by macro.
1736 }
1737 };
1738 result_t<nothing> InvokeClause(invoke& Target,nat WhichClause,cursor BlockStart,block_t& Block0,cursor NextBlockStart,const capture_t& NextTokenLeading=capture_t()) {
1739 // We've committed to producing a macro invocation, so accumulate specifiers m<a> and handle any prior m(a).catch up to clauses from call m(c).
1740 auto Specifiers = syntaxes_t{};
1741 const snippet* FirstSpecifier = nullptr;
1742 while(auto Call=Target.FirstCall) {
1743 Target.FirstCall=Target.FirstCall->OuterCall;
1744 ApplyTrailing(Call->CallParameter,Call->CallTrailingStop);
1745 if(Call->CallMode==mode::Open) {
1746 GRAMMAR_ASSERT(!Target.Clauses[0] && !Target.Clauses[1] && !Target.Clauses[2]);
1747 if(FirstSpecifier)
1748 Call->CallParameter.BlockSnippet = Snip(point::Start(*FirstSpecifier),point::Stop(Call->CallParameter.BlockSnippet));
1749 Call->CallParameter.Specifiers = Specifiers;
1750 Target.Clauses[0] = &Call->CallParameter;
1751 Target.Of = nullptr;
1752 return InvokeClause(Target,WhichClause,BlockStart,Block0,NextBlockStart,NextTokenLeading);
1753 }
1754 else if(Call->CallMode==mode::With) {
1755 if(!Gen.SyntaxesLength(Specifiers))
1756 FirstSpecifier = &Call->CallParameter.BlockSnippet;
1757 GRAMMAR_LET(E,Gen.Parenthesis(Call->CallParameter));
1758 Gen.SyntaxesAppend(Specifiers,E);
1759 }
1760 else Err();
1761 }
1762 if(Target.PriorClause)
1763 ApplyTrailing(*Target.PriorClause,FirstSpecifier? point::Start(*FirstSpecifier): BlockStart);
1764 if(FirstSpecifier)
1765 Block0.BlockSnippet = Snip(point::Start(*FirstSpecifier),point::Stop(Block0.BlockSnippet));
1766 Block0.Specifiers = Specifiers;
1767 Target.LastCall = nullptr; // Catch up so subsequent accumulation works.
1768 Target.Clauses[WhichClause] = &Block0;
1769 Target.PriorClause = Block0.BlockSnippet? &Block0: Target.PriorClause;
1770 if(!Target.ExprStop)
1771 return Invoke(Target,NextBlockStart,NextTokenLeading);
1772 else
1773 return Target.OnFinish(*this);
1774 };
1775 result_t<nothing> Invoke(invoke& Target,cursor BlockStart,capture_t TokenLeading=capture_t()) {
1776 // Markup := '<' Scan Tags Scan ":>" Space Ind Contents Ded
1777 // | '<' Scan Tags Scan ';' Scan Contents '>'
1778 // | '<' Scan Tags Scan '>' Scan Contents '</' Ident Space {'/' Ident Space} '>'
1779 // Tags := Space (!'/' Call ScanKey '.' | !Reserved) QualIdent Space {Invoke} [',' Scan Tags]
1780 // Postfix := .. | !Invoke (Paren | Specs) | ..
1781 // Invoke := [Specs] (Paren [Specs] (Block | Do ) | Block [[Specs] Do ]) (Until | !Until)
1782 // If := "if" Key [Specs] (Paren (Block | Then) | Block [ Then]) (Else | !Else )
1783 GRAMMAR_ASSERT(CheckToken());
1784 auto PostfixStart = Cursor;
1785 auto PostfixToken = PostfixStart.Token;
1786 if(!Target.AllowPostfixes.Has(PostfixToken))
1787 return Target.OnFinish(*this);
1788
1789 // Definitely starting a new potential clause.
1790 switch(nat8(PostfixToken)) {
1791 case token(u8"("): {
1792 // Paren := '(' List ')' Space
1793 EatToken();
1794 GRAMMAR_LET(Block0,List(u8")",&parser::S82,Cursor,capture_t(),punctuation::Parens,Cursor));
1795 if(Eat(u8":)")) {
1796 // If we're in an attribute like @a (b:)c, move the QualIdent to the Base handler for '@'.
1797 GRAMMAR_LET(InsertExpr,FinishExpr(token::None(),prec::Prefix,Target));
1798 if(!InsertExpr || !InsertExpr->QualIdentTarget)
1799 return parser::S82(u8":)");
1800 InsertExpr->QualIdentTarget->Start=PostfixStart;
1801 GRAMMAR_LET(Id,QualIdentQualified(*InsertExpr,PostfixStart,Block0));
1802 GRAMMAR_RUN(UpdateSpaceTrailing(*InsertExpr->QualIdentTarget,Id));
1803 return nothing{};
1804 }
1805 GRAMMAR_RUN(RequireClose(PostfixStart,u8"(",u8")",&parser::S82));
1806 Block0.BlockSnippet = Snip(BlockStart);
1807 auto NewCall = call{u8"(",Cursor,mode::Open,Block0};
1808 GRAMMAR_RUN(SpaceTrailing(NewCall.CallParameter.BlockTrailing));
1809 NewCall.CallTrailingStop = Cursor;
1810 Target.UpdateLastCall(&NewCall);
1811 Target.Of = &NewCall;
1812 Target.AllowPostfixes = (Target.AllowPostfixes & ~ParenPostfixes) | Target.InTokens;
1813 if(Target.StartToken==token(u8"if")) // Disallow if(a)<b>{c} to enable future if(a) b.
1814 Target.AllowPostfixes = Target.AllowPostfixes & ~WithPostfixes;
1815 return Invoke(Target,Cursor);
1816 }
1817 case token(u8"<"): case token(u8"with"): {
1818 // Specs := [ScanKey "with" Key] '<' Scan Choose Space '>' Space (Specs | !Specs)
1820 capture_t PunctuationLeading;
1821 if(PostfixToken==token(u8"with")) {
1822 EatToken();
1823 CallToken=u8"with";
1824 GRAMMAR_RUN(Space(PunctuationLeading));
1825 if(Cursor.Token!=token(u8"<"))
1826 return S78();
1827 }
1828 EatToken();
1829 GRAMMAR_LET(Leading,Space());
1830 // We parse specifier at prec::Choose, but FinishExpr at prec::Less to right-associate nested '<'.
1831 // LessExpr receives TrailingCapture so specifiers can handle it and less-than can propagate it.
1832 // If we parsed a<b<c at just prec::Choose, the inner FinishExpr forces the finishes prec::Choose,
1833 // whose FinishExpr forces the outer prec::Choose, so the outer Postfix incorrectly parses first.
1834 // This is as simple as it can be; other approaches add bloat.
1835 bool GotLess = false;
1836 auto LessExpr = when_expr(prec::Less,&Target,AllowLess,Cursor,Leading,[&](expr& LessExpr)->result_t<nothing> {
1837 // We get here only if we parse a Less expression a<b, not if we parse a specifier.
1839 auto& InsertExpr = *LessExpr.OuterExpr; // Dynamic, not necessarily Target.
1840 return UpdateFrom(InsertExpr,LessExpr.Trailing,Gen.InfixToken(
1841 SnipFinished(InsertExpr.Start,LessExpr),PostfixToken->PostfixMode,
1842 ApplyTrailing(InsertExpr),PostfixToken->Symbol,*LessExpr
1843 ));
1844 });
1845 return WhenExpr(u8"<",prec::Choose,prec::Less,&LessExpr,capture_t(),[&](expr& RightExpr)->result_t<nothing> {
1846 GRAMMAR_RUN(UpdateFrom(LessExpr,RightExpr.Trailing,*RightExpr));
1847 if(Eat(u8">")) {
1848 // Parsed a specifier. Abandon LessExpr.
1849 auto RightSyntax = Gen.Leading(Leading,ApplyTrailing(LessExpr));
1850 auto SpecifierBlock = SingletonBlock(Snip(BlockStart,Cursor),RightSyntax,PunctuationLeading,punctuation::AngleBrackets);
1851 SpecifierBlock.Token = CallToken;
1852 SpecifierBlock.TokenLeading = TokenLeading;
1853 auto NewCall = call{u8"<",Cursor,mode::With,SpecifierBlock};
1854 GRAMMAR_RUN(SpaceTrailing(NewCall.CallParameter.BlockTrailing));
1855 NewCall.CallTrailingStop = Cursor;
1856 Target.UpdateLastCall(&NewCall);
1857 return Invoke(Target,Cursor);
1858 }
1859 else if(PostfixToken!=token(u8"with")) {
1860 // We parsed a Less expression a<b so figure out where it lands and finish parsing it.
1861 GotLess=true;
1862 GRAMMAR_SET(LessExpr.OuterExpr,FinishExpr(token(u8"<"),prec::Less,Target));
1863 if(!LessExpr.OuterExpr)
1864 return S61(u8"<");
1865 return Postfix(u8"<",prec::Less,LessExpr); // Trigger's LessExpr's when_expr.
1866 }
1867 else return S79();
1868 });
1869 }
1870 case token(u8"{"): case token(u8"."): case token(u8":"): case token(u8"in"): {
1871 // Block := Brace | DotSpace Space Def Space | (DotSpace | ':') Space Ind List Ded
1872 bool Fails=false;
1873 GRAMMAR_LET(Block0,Block(u8"macro invocation",Target,BlockStart,TokenLeading,Fails));
1874 if(!Fails) {
1875 Target.AllowPostfixes = (Target.AllowPostfixes & ~ParenPostfixes & ~BlockPostfixes) | Target.InTokens | Target.PostTokens;
1876 if(Target.Of)
1877 Target.AllowPostfixes = Target.AllowPostfixes & ~Target.InTokens;
1878 if(Target.StartToken==token(u8"if")) // Disallow if{a}<b>.. so else-if never finishes before last InvokedClause.
1879 Target.AllowPostfixes = Target.AllowPostfixes & ~WithPostfixes;
1880 return InvokeClause(Target,Target.Of!=0,BlockStart,Block0,Cursor);
1881 }
1882 return Target.OnFinish(*this); // For In, '.' QualIdent.
1883 }
1884 case token(u8"do"): case token(u8"then"): {
1885 // Do := ScanKey "do" Key (KeyBlock | Def)
1886 // Then := ScanKey "then" Key (KeyBlock | Def)
1887 EatToken();
1888 GRAMMAR_LET(PunctuationLeading,Space());
1889 GRAMMAR_LET(Block0,KeyBlock(prec::Def,Target,BlockStart,TokenLeading,PostfixToken->Symbol,PunctuationLeading));
1890 Target.AllowPostfixes = (Target.AllowPostfixes & ~Target.InTokens) | Target.PostTokens;
1891 return InvokeClause(Target,1,BlockStart,Block0,Cursor);
1892 }
1893 case token(u8"until"): {
1894 // Until := ScanKey "until" Key (KeyBlock | Def) | ..
1895 EatToken();
1896 GRAMMAR_LET(PunctuationLeading,Space());
1897 GRAMMAR_LET(Block0,KeyBlock(prec::Def,Target,BlockStart,TokenLeading,PostfixToken->Symbol,PunctuationLeading));
1898 Target.AllowPostfixes = token_set{};
1899 return InvokeClause(Target,2,BlockStart,Block0,Cursor);
1900 }
1901 case token(u8"catch"): {
1902 // Until := .. | ScanKey "catch" Key Invoke
1903 // Chain more catches only if !Target.FirstCall. Update AllowTokens to reenable catch.
1904 EatToken();
1905 auto CatchExpr = when_expr(prec::Base,&Target,AllTokens,BlockStart,TokenLeading,[&](expr& CatchExpr)->result_t<nothing> {
1906 auto Block0=SingletonBlock(BlockStart,CatchExpr);
1907 return InvokeClause(Target,2,BlockStart,Block0,*CatchExpr.Finished);
1908 });
1909 GRAMMAR_RUN(UpdateSpaceTrailing(CatchExpr,Gen.Native(Snip(BlockStart),u8"catch")));
1910 invoke CatchTarget{u8"catch",CatchExpr,BlockStart,token(u8"catch"),token_set{u8"do"},token_set{u8"until",u8"catch"}};
1911 GRAMMAR_RUN(Invoke(CatchTarget,Cursor));
1912 if(!CatchExpr.Finished)
1913 CatchExpr.OnFinish(*this);
1914 return nothing();
1915 }
1916 case token(u8"else"): {
1917 // Else := ScanKey "else" Key (ScanKey If | !(ScanKey If) (KeyBlock | Def))
1918 EatToken();
1919 GRAMMAR_LET(PunctuationLeading,Space());
1920 Target.AllowPostfixes = token_set{};
1921 if(Cursor.Token==token(u8"if")) {
1922 // Grammar makes "else if" a special case so "if(a){b}else if(c){d}+1"
1923 // is equivalent to "(if(a){b}else if(c){d})+1", not "if(a){b}else (if(c){d}+1)".
1924 return WhenExpr(u8"else if",prec::Base,prec::Base,&Target,PunctuationLeading,[&](expr& ElseExpr)->result_t<nothing> {
1925 Target.ExprStop = ElseExpr.ExprStop;
1926 auto ElseBlock = SingletonBlock(BlockStart,ElseExpr);
1927 ElseBlock.Token = PostfixToken->Symbol;
1928 ElseBlock.TokenLeading = TokenLeading;
1929 return InvokeClause(Target,2,BlockStart,ElseBlock,Cursor);
1930 });
1931 }
1932 else {
1933 GRAMMAR_LET(ElseBlock,KeyBlock(prec::Def,Target,BlockStart,TokenLeading,PostfixToken->Symbol,PunctuationLeading));
1934 return InvokeClause(Target,2,BlockStart,ElseBlock,Cursor);
1935 }
1936 }
1937 case token(u8","): case token(u8";"): case token(u8">"): case token(u8":>"): {
1938 if(!Target.Clauses[0] || Target.FirstCall) {
1939 // If we have <m;c>, <m(a);c>, <m<a>;c>, <m{a}<b>;c>, introduce a new block and recurse back.
1940 block_t Block0{Snip()};
1941 return InvokeClause(Target,Target.FirstCall!=nullptr,BlockStart,Block0,BlockStart,TokenLeading);
1942 }
1943 EatToken();
1944 if(!Target.OuterExpr->MarkupTag)
1945 return S40();
1946 if(Target.PriorClause)
1947 ApplyTrailing(*Target.PriorClause,BlockStart); // TODO: Fix, as this is bad for <m(a)\n ;a>.
1948 Target.OuterExpr->MarkupFinished=true;
1949 capture_t PreContent,PostContent;
1950 switch(nat8(PostfixToken)) {
1951 case token(u8","): {
1952 GRAMMAR_LET(InnerContent,MarkupExpr(Target.OuterExpr,PostfixStart));
1953 return InvokeMarkup(Target,TokenLeading,capture_t(),InnerContent,capture_t());
1954 }
1955 case token(u8";"): {
1956 Gen.MarkupStart(PreContent,Snip(PostfixStart));
1957 GRAMMAR_LET(Content,Trimmed(false));
1958 cursor ContentsEnd=Cursor;
1959 GRAMMAR_RUN(Require(u8">",&parser::S51));
1960 Gen.MarkupStop(PostContent,Snip(ContentsEnd));
1961 return InvokeMarkup(Target,TokenLeading,PreContent,Content,PostContent);
1962 }
1963 case token(u8">"): {
1964 Gen.MarkupStart(PreContent,Snip(PostfixStart));
1965 GRAMMAR_LET(Content,Trimmed(true));
1966 cursor PostStart=Cursor;
1967 GRAMMAR_RUN(Require(u8"<",&parser::S52));
1968 Gen.MarkupStart(PostContent,Snip(PostStart));
1969 for(auto* ExpectMarkup=Target.OuterExpr; ExpectMarkup; ExpectMarkup=ExpectMarkup->OuterMarkup) {
1970 GRAMMAR_RUN(Require(u8"/",&parser::S44));
1971 if(!IsAlpha(Cursor[0]))
1972 return S44(ExpectMarkup->MarkupTag);
1973 auto TagStart=Cursor;
1974 GRAMMAR_LET(EndTag,Ident());
1975 if(EndTag!=ExpectMarkup->MarkupTag)
1976 return S43(ExpectMarkup->MarkupTag,EndTag);
1977 auto TagSnippet=Snip(TagStart);
1978 Gen.MarkupTag(PostContent,TagSnippet);
1980 }
1981 cursor PostEnd=Cursor;
1982 GRAMMAR_RUN(Require(u8">",&parser::S44));
1983 Gen.MarkupStop(PostContent,Snip(PostEnd));
1984 return InvokeMarkup(Target,TokenLeading,PreContent,Content,PostContent);
1985 }
1986 case token(u8":>"): {
1987 Gen.MarkupStart(PreContent,Snip(PostfixStart));
1989 if(!Ending())
1990 return S46();
1992 GRAMMAR_LET(Content,Contents(true));
1993 GRAMMAR_RUN(Ded(SavedContext,&parser::S54));
1995 return InvokeMarkup(Target,TokenLeading,PreContent,Content,PostContent);
1996 }
1997 default: {
1998 break;
1999 }}
2000 [[fallthrough]];
2001 }
2002 case token::NewLine(): {
2003 GRAMMAR_LET(ScanToken,ScanKey(TokenLeading,
2004 Target.AllowPostfixes&token_set{u8"catch",u8"do",u8"else",u8"then",u8"until",u8"with",u8"{",u8">",u8":>",u8",",u8";"}));
2005 if(ScanToken)
2006 return Invoke(Target,BlockStart,TokenLeading);
2007 return Target.OnFinish(*this);
2008 }
2009 default: // Ensure static analysis happy with all permutations covered
2010 break;
2011 }
2012 Err(); // AllowPostfixes makes this unreachable.
2013 }
2014
2015 // Markup.
2016 result_t<nothing> InvokeMarkup(invoke& InvokeTarget,const capture_t& TokenLeading,const capture_t& PreContent,syntax_t& Content,const capture_t& PostContent) {
2017 auto& MarkupExpr = *InvokeTarget.OuterExpr;
2018 auto NoTrailing = trailing{Cursor, capture_t()};
2019 GRAMMAR_RUN(UpdateFrom(MarkupExpr,NoTrailing,Gen.InvokeMarkup(
2020 Snip(*MarkupExpr.MarkupStart),
2021 !MarkupExpr.OuterMarkup? u8"<": u8",",
2022 MarkupExpr.ExprLeading,
2023 ApplyTrailing(MarkupExpr),
2024 InvokeTarget.Clauses[0],InvokeTarget.Clauses[1],
2025 TokenLeading,
2027 ));
2028 MarkupExpr.ExprLeading = capture_t();
2029 return MarkupExpr.OnFinish(*this);
2030 }
2031 result_t<syntax_t> Markup() {
2032 GRAMMAR_ASSERT(Cursor[0]=='<');
2033 auto Start=Cursor;
2034 Next(1);
2035 return MarkupExpr(nullptr,Start);
2036 }
2037
2038 // Expressions.
2039 struct ins {cursor Start; token InToken; cursor NextStart; capture_t NextLeading; const ins* NextIns;};
2040 result_t<nothing> InChoose(expr& PostfixExpr,cursor Start,const ins* Ins=nullptr) {
2041 // In := ("in" Key | ':') Space (In | NotEq)
2042
2043 scoped_guard ExprDepthGuard(ExprDepth, ExprDepth + 1);
2044 if (ExprDepth > VERSE_MAX_EXPR_DEPTH)
2045 return S99();
2046
2047 // Here, we parse the Choose into PostfixExpr without finishing it.
2048 auto InToken = Cursor.Token;
2049 if(InPrefixes.Has(Cursor.Token)) {
2050 EatToken();
2051 auto NextStart=Cursor;
2052 GRAMMAR_LET(NextLeading,Space());
2053 auto NextIn=ins{Start,InToken,NextStart,NextLeading,Ins};
2054 return InChoose(PostfixExpr,Cursor,&NextIn);
2055 }
2056 GRAMMAR_RUN(WhenExpr(InToken->Symbol,prec::Choose,prec::Choose,&PostfixExpr,capture_t(),[&](expr& Right)->result_t<nothing> {
2057 auto NewRight=*Right;
2058 for(; Ins; Ins=Ins->NextIns) {
2059 auto RightBlock=SingletonBlock(SnipFinished(Ins->NextStart,Right),Gen.Leading(Ins->NextLeading,NewRight));
2060 GRAMMAR_SET(NewRight,Gen.PrefixToken(
2061 SnipFinished(Ins->Start,Right),Ins->InToken->PrefixMode,Ins->InToken->Symbol,
2062 RightBlock,false));
2063 }
2064 return UpdateFrom(PostfixExpr,Right.Trailing,NewRight);
2065 }));
2066 return nothing{};
2067 }
2068 result_t<nothing> DefPostfix(expr& Target) {
2069 // Def := (.. | .. Space (('='|':='|'+='|'*='|'/=') Space (BraceInd | Def) | !'=' !':=')) {&In Def | ..}
2070 auto DefineToken=Cursor.Token;
2071 if(DefPostfixes.Has(DefineToken)) {
2072 EatToken();
2073 GRAMMAR_LET(Right,BraceInd(DefineToken->Symbol,prec::Def,Target));
2074 GRAMMAR_RUN(UpdateFrom(Target,Right.BlockTrailing,Gen.InfixBlock(
2075 SnipFinished(Target.Start,Right),
2076 ApplyTrailing(Target),DefineToken->Symbol,Right)));
2077 }
2078 return nothing{};
2079 }
2080 result_t<nothing> Base(text What,prec Prec,expr& Target,error_t(parser::*OnTokenError)(text),error_t(parser::*OnPrecError)(text,text)) {
2081 // Base := '(' List ')' | Num | Char | Path | String | Markup | If | !Reserved QualIdent
2082 GRAMMAR_ASSERT(CheckToken());
2083 auto BaseToken = Cursor.Token;
2084 if(Prec<=BaseToken->PrefixPrec) {
2085 switch(nat8(Cursor.Token)) {
2086 case token::Digit(): {
2087 if(Cursor[0]=='0' && Cursor[1]=='o' && IsHex(Cursor[2])) {
2088 GRAMMAR_LET(c,Char8());
2089 return UpdateSpaceTrailing(Target,Gen.Char8(Snip(Target.Start),c));
2090 }
2091 else if(Cursor[0]=='0' && Cursor[1]=='u' && IsHex(Cursor[2])) {
2092 GRAMMAR_LET(c,Char32());
2093 return UpdateSpaceTrailing(Target,Gen.Char32(Snip(Target.Start),c,true,false));
2094 }
2095 else return UpdateSpaceTrailing(Target,Num());
2096 }
2097 case token(u8"\""): {
2098 // String := '"' {Interp | CharEsc | !('\'|'{'|'}'|'"') Text} '"'
2099 Next(1);
2101 GRAMMAR_RUN(Require(u8"\"",&parser::S32));
2102 return UpdateSpaceTrailing(Target,Gen.String(Snip(Target.Start),Capture));
2103 }
2104 case token(u8"'"): {
2105 // CharLit := ''' Printable ''' !''' | ''' CharEsc '''
2106 return UpdateSpaceTrailing(Target,CharLit());
2107 }
2108 case token::Alpha(): // Ident.
2109 case token(u8"("): // QualIdent or Paren.
2110 case token(u8"at"): case token(u8"of"): // Infix operator tokens that are allowed as identifiers.
2111 case token(u8"to"):
2112 case token(u8"next"): case token(u8"over"): case token(u8"when"): case token(u8"while"):
2113 case token(u8"and"): case token(u8"or"): {
2114 // Ident := Alpha {Alnum} !Alnum ["'" {!('<#'|'#>'|'\'|'{'|'}'|'"'|''') 0o20-0o7E} "'"]
2115 // QualIdent := ['(' List ':)' Space] Ident
2116 // Base := '(' List ')' | ..
2117 // Postfix-only non-ScanKey keywords are valid identifiers.
2118 return UpdateSpaceTrailing(Target,QualIdent(What,Target,true));
2119 }
2120 case token(u8"@"): {
2121 // Expr := .. | '@' Space Call Scan &('@'|QualIdent) Expr
2122 EatToken();
2124
2125 // Set up for parsing RightExpr later. It may receive QualIdent early, e.g. in @a (b:)c...
2126 auto RightExpr = when_expr(prec::Expr,&Target,AllTokens,Cursor,capture_t(),[&](expr& RightExpr) {
2127 return UpdateFrom(Target,RightExpr.Trailing,Gen.PrefixAttribute(
2128 SnipFinished(Target.Start,RightExpr),
2130 });
2131
2132 // Parse attribute, with RightExpr as its QualIdent target for @a (b:)c.
2134 GRAMMAR_RUN(WhenExpr(u8"@",prec::Call,prec::Prefix,nullptr,AttrLeading,[&](expr& AttrExpr)->result_t<nothing> {
2135 ApplyTrailing(AttrExpr,true);
2137 return nothing{};
2138 },AllTokens,&parser::S71,&RightExpr));
2139
2140 // Parse all or the remainder of RightExpr.
2141 if(!RightExpr.ExprSyntax) {
2142 GRAMMAR_RUN(Scan(RightExpr.ExprLeading));
2143 RightExpr.Start=Cursor;
2144 if(Cursor[0]!='@' && Cursor[0]!='(' && !IsAlnum(Cursor[0]))
2145 return S67();
2146 GRAMMAR_RUN(Base(What,Prec,RightExpr,&parser::S71,&parser::S60));
2147 }
2148 GRAMMAR_RUN(Postfix(What,Prec,RightExpr,&parser::S71,&parser::S60));
2149 return RightExpr.Result;
2150 }
2151 case token(u8"<"): {
2152 // Base = .. | Markup | ..
2153 return UpdateSpaceTrailing(Target,Markup());
2154 }
2155 case token(u8"/"): {
2156 // Base = (.. | Path | ..) Space
2157 GRAMMAR_LET(P,Path());
2158 return UpdateSpaceTrailing(Target,Gen.Path(Snip(Target.Start),P));
2159 }
2160 case token(u8":"): case token(u8"in"): {
2161 // In := ("in" Key | ':') Space (In | NotEq)
2162 // Def := (Or | (In|Var) Space (('='|':='|'+='|'*='|'/=') Space (BraceInd | Def) | !'=' !':=')) {&In Def | ..}
2163
2164 // Postfix definition x:t leads here, so capture any :t<v, keeping x:t definition at the top.
2165 auto PostfixExpr = when_expr(prec::Def,&Target,AllTokens,Cursor,capture_t(),[&](expr& PostfixExpr)->result_t<nothing> {
2166 // This runs when In or Postfix below finishes PostfixExpr.
2167 GRAMMAR_RUN(UpdateFrom(Target,PostfixExpr.Trailing,*PostfixExpr));
2168 return DefPostfix(Target);
2169 },nullptr);
2170
2171 // In parses Choose into PostfixExpr, then Postfix extends to NotEq.
2172 GRAMMAR_RUN(InChoose(PostfixExpr,Target.Start));
2174 return nothing{};
2175 }
2176 case token(u8"var"): case token(u8"set"): case token(u8"ref"): case token(u8"alias"): case token(u8"live"): {
2177 // Var := (("var" [Space '<' Space Choose Space '>'] [Space "live"])|("set" [Space "live"])|"ref"|"alias"|"live") Key Space Choose
2178 // Def := (Or | (In|Var) Space (('='|':='|'+='|'*='|'/=') Space (BraceInd | Def) | !'=' !':=')) {&In Def | ..}
2179 bool bIsVar = Cursor.Token == token(u8"var");
2180 bool bIsSet = Cursor.Token == token(u8"set");
2181 bool bLive = Cursor.Token == token(u8"live");
2182 EatToken();
2183 syntaxes_t Attributes;
2184#if !TIM /* This syntax will evolve from var<specifier> x:t=v to x:t<specifier>=v. */
2185 if (bIsVar)
2186 {
2187 while (true)
2188 {
2190 if (Cursor.Token != token(u8"<"))
2191 {
2192 break;
2193 }
2194
2195 EatToken();
2196 GRAMMAR_RUN(Space());
2197 GRAMMAR_RUN(WhenExpr(u8"<",prec::Choose,prec::Less,&Target,StartingSpace,[&](expr& Expr)->result_t<nothing> {
2198 ApplyTrailing(Expr,true);
2199 Gen.SyntaxesAppend(Attributes,*Expr);
2200 return nothing{};
2201 }));
2202 GRAMMAR_RUN(Space());
2203 GRAMMAR_RUN(RequireClose(Cursor,u8"<",u8">",&parser::S85));
2204 }
2205 }
2206#endif
2207 auto ChooseStart = Cursor;
2208 GRAMMAR_LET(Middle, Space());
2209 if (bIsVar || bIsSet)
2210 {
2211 if (Cursor.Token == token(u8"live"))
2212 {
2213 EatToken();
2214 bLive = true;
2215 ChooseStart = Cursor;
2216 GRAMMAR_RUN(Space());
2217 }
2218 }
2219 return WhenExpr(BaseToken->Symbol,prec::Choose,prec::Choose,&Target,Middle,[&](expr& Choose)->result_t<nothing> {
2220 auto ChooseBlock=SingletonBlock(ChooseStart,Choose);
2221 GRAMMAR_RUN(UpdateFrom(Target,ChooseBlock.BlockTrailing,Gen.PrefixToken(
2222 SnipFinished(Target.Start,Choose),
2223 BaseToken->PrefixMode,BaseToken->Symbol,
2224 ChooseBlock, false, Attributes, bLive)));
2225 if(DefPostfixes.Has(Cursor.Token)) // Translate "set x=3" to "set{x}:=3".
2226 return DefPostfix(Target);
2227 return nothing{};
2228 });
2229 }
2230 case token(u8".."): case token(u8"not"): /*case token("!"):*/ {
2231 // Not := .. | ("not" Key) Space Not
2232 // Def := .. | (.. | '..') Space Def
2233 EatToken();
2234 auto RightStart=Cursor;
2235 GRAMMAR_LET(Middle,Space());
2236 return WhenExpr(BaseToken->Symbol,BaseToken->PrefixPrec,BaseToken->PrefixPrec,&Target,Middle,[&](expr& RightExpr) {
2237 auto RightBlock=SingletonBlock(RightStart,RightExpr);
2238 return UpdateFrom(Target,RightBlock.BlockTrailing,Gen.PrefixToken(
2239 SnipFinished(Target.Start,RightExpr),
2240 BaseToken->PrefixMode,BaseToken->Symbol,
2241 RightBlock,false));
2242 });
2243 }
2244 case token(u8"&"): {
2245 // Def := .. | ('&' | ..) Space Def
2246 EatToken();
2247 GRAMMAR_LET(Middle,Space());
2248 return WhenExpr(u8"&",BaseToken->PrefixPrec,BaseToken->PrefixPrec,&Target,Middle,[&](expr& Right) {
2249 return UpdateFrom(Target,Right.Trailing,Gen.Escape(
2250 SnipFinished(Target.Start,Right),*Right));
2251 });
2252 }
2253 case token(u8"^"): case token(u8"?"): case token(u8"+"): case token(u8"-"): case token(u8"*"): {
2254 // Prefix := .. | ('^' | '?' | .. | '+' | '-' | '*') Space (Brace | Prefix)
2255 EatToken();
2256 return WhenBraceCall(BaseToken->Symbol,BaseToken->PrefixPrec,Target,[&](block_t& RightBlock)->result_t<nothing> {
2257 return UpdateFrom(Target,RightBlock.BlockTrailing,Gen.PrefixToken(
2258 SnipFinished(Target.Start,RightBlock),
2259 BaseToken->PrefixMode,BaseToken->Symbol,
2260 RightBlock,RightBlock.Punctuation==punctuation::Braces));
2261 });
2262 }
2263 case token(u8"["): {
2264 // Prefix := .. | (.. | '[' List ']' | ..) Space (Brace | Prefix)
2265 EatToken();
2266 GRAMMAR_LET(Left,List(u8"]",&parser::S85,Cursor,capture_t(),punctuation::None,Cursor));
2267 GRAMMAR_RUN(RequireClose(Target.Start,u8"[",u8"]",&parser::S85));
2268 return WhenBraceCall(u8"[]",BaseToken->PrefixPrec,Target,[&](block_t& Right)->result_t<nothing> {
2269 return UpdateFrom(Target,Right.BlockTrailing,Gen.PrefixBrackets(
2270 SnipFinished(Target.Start,Right),
2271 Left,Right));
2272 });
2273 }
2274 case token(u8"if"): {
2275 // If := "if" Key [Specs] (Paren (Block | Then) | Block [Then]) (Else | !Else)
2276 Target.MarkupTag=u8"if"; // We propagate markup to support <if(a)>Hello</if>.
2277 EatToken();
2278 GRAMMAR_RUN(UpdateSpaceTrailing(Target,Gen.Native(Snip(Target.Start),u8"if")));
2279 invoke IfTarget{u8"if",Target,Target.Start,token(u8"if"),token_set{u8"then"},token_set{u8"else"}};
2280 return Invoke(IfTarget,Cursor);
2281 }
2282 case token(u8"return"): case token(u8"yield"): case token(u8"break"): case token(u8"continue"): {
2283 // Def := .. | Return [KeyBlock|Def] StopDef
2284 EatToken();
2285 block_t Right;
2286 Right.BlockTrailing.TrailingStart=Cursor;
2287 GRAMMAR_SET(Right.BlockTrailing.TrailingCapture,Space());
2288 if(!StopDef.Has(Cursor.Token))
2289 GRAMMAR_SET(Right,KeyBlock(prec::Def,Target,*Right.BlockTrailing.TrailingStart,capture_t(),u8"",Right.BlockTrailing.TrailingCapture));
2290 return UpdateFrom(Target,Right.BlockTrailing,Gen.PrefixToken(
2291 SnipFinished(Target.Start,Right),
2292 BaseToken->PrefixMode,BaseToken->Symbol,
2293 Right,false));
2294 }
2295 case token(u8"!"):
2296 return S62();
2297 default: { // Static analysis warnings without default
2298 break;
2299 }}
2300 Err(); // Should never occur due to structure of the precedence table.
2301 }
2302 if(BaseToken->PrefixPrec==prec::Never)
2303 return (this->*OnTokenError)(What);
2304 else
2305 return (this->*OnPrecError)(What,BaseToken->Symbol);
2306 }
2307 result_t<nothing> Postfix(text /*What*/,prec Prec,expr& Target,error_t(parser::*/*OnTokenError*/)(text)=&parser::S71,error_t(parser::*/*OnPrecError*/)(text,text)=&parser::S60) {
2308 while(!Target.Finished) {
2309 auto PostfixStart = Cursor;
2310 auto TokenLeading = capture_t();
2311 auto PostfixToken = Cursor.Token;
2313 GRAMMAR_ASSERT(CheckToken());
2314 if(!(Prec<=PostfixToken->PostfixTokenPrec || (Target.MarkupStart && MarkupPostfixes.Has(PostfixToken)))) {
2316 Cursor=PostfixStart; //backtrack NewLine's ScanToken
2317 return Target.OnFinish(*this);
2318 }
2319 if(!Target.AllowPostfixes.Has(Cursor.Token)) // Immediate error disallowing e.g. a<=b>c per grammar.
2320 return S61(PostfixToken->Symbol);
2321 switch(nat8(Cursor.Token)) {
2322 case token(u8"&"):
2323 if(Cursor[1]=='&')
2324 return S62();
2325 goto binary_operator;
2326 case token(u8"|"):
2327 if(Cursor[1]=='|')
2328 return S62();
2329 goto binary_operator;
2330 case token(u8">"):
2331 if(Target.MarkupStart)
2333 goto binary_operator; // Else continue on as greater operator.
2335 case token(u8"*"): case token(u8"/"):
2336 case token(u8"+"): case token(u8"-"):
2337 case token(u8"to"): case token(u8".."): case token(u8"->"):
2338 case token(u8">="):
2339 case token(u8"<="):
2340 case token(u8"<>"):
2341 case token(u8"="):
2342 case token(u8"and"): /*case token(u8"&&"):*/
2343 case token(u8"or"): /*case token(u8"||"):*/ {
2344 // Mul := Prefix { Space ('*' | '/' | '&' ) Scan Prefix }
2345 // Add := Mul { Space ('+' | '-' ) Scan Mul }
2346 // To := Add [ Space ("to" Key | ".." | "->") Scan To ]
2347 // Choose := To [ Space ('|' ) Scan Choose ]
2348 // Greater := Choose [ Space ('>' | ">=" ) Scan Greater ]
2349 // Less := Greater [ Space ('<' | "<=" ) Scan &(Choose Space !'>' !'>=') Less]
2350 // NotEq := Less { Space ('<>' ) Scan Choose }
2351 // Eq := NotEq { Space ('=' ) Scan NotEq }
2352 // And := Not { Space ("and" Key ) Scan And }
2353 // Or := And { Space ("or" Key ) Scan Or }
2354 Target.MarkupTag=u8"";
2355 EatToken();
2356 capture_t Leading;
2357 GRAMMAR_RUN(Scan(Leading));
2358 GRAMMAR_RUN(WhenExpr(PostfixToken->Symbol,PostfixToken->PostfixRightPrec(),PostfixToken->PostfixRightPrec(),&Target,Leading,[&](expr& Right)->result_t<nothing> {
2359 return UpdateFrom(Target,Right.Trailing,Gen.InfixToken(
2360 SnipFinished(Target.Start,Right),PostfixToken->PostfixMode,
2361 ApplyTrailing(Target),PostfixToken->Symbol,*Right
2362 ));
2363 },PostfixToken->PostfixAllowMask));
2364 continue;
2365 }
2366 case token(u8"^"): case token(u8"?"): case token(u8"ref"): {
2367 // Call := Base {Space Postfix}
2368 // Postfix := .. | ('^' | '?' | "ref" | ..)
2369 Target.MarkupTag=u8"";
2370 EatToken();
2371 GRAMMAR_RUN(UpdateSpaceTrailing(Target,Gen.PostfixToken(
2372 Snip(Target.Start),PostfixToken->PostfixMode,
2373 ApplyTrailing(Target),PostfixToken->Symbol)));
2374 continue;
2375 }
2376 case token(u8"["): {
2377 // Call := Base {Space Postfix}
2378 // Postfix := .. | (.. | '[' List ']' ..)
2379 Target.MarkupTag=u8"";
2380 EatToken();
2381 GRAMMAR_LET(Block0,List(u8"]",&parser::S83,Cursor,capture_t(),punctuation::Brackets,Cursor));
2382 GRAMMAR_RUN(RequireClose(Target.Start,u8"[",u8"]",&parser::S83));
2383 Block0.BlockSnippet=Snip(PostfixStart);
2384 GRAMMAR_RUN(UpdateSpaceTrailing(Target,Gen.Call(
2385 Snip(Target.Start),mode::Closed,
2386 ApplyTrailing(Target),Block0)));
2387 continue;
2388 }
2389 case token(u8"@"): {
2390 // Expr := Fun {'@' Space Call} StopExpr | ..
2391 EatToken();
2392 GRAMMAR_LET(Leading,Space());
2393 GRAMMAR_RUN(WhenExpr(PostfixToken->Symbol,prec::Call,prec::Call,&Target,Leading,[&](expr& Right)->result_t<nothing> {
2394 return UpdateFrom(Target,Right.Trailing,Gen.PostfixAttribute(
2395 SnipFinished(Target.Start,Right),
2396 ApplyTrailing(Target),*Right));
2397 }));
2398 continue;
2399 }
2400 case token(u8"at"): case token(u8"of"): {
2401 // Postfix := .. | ("at"|"of") Key (KeyBlock | Fun)
2402 Target.MarkupTag=u8"";
2403 EatToken();
2404 GRAMMAR_LET(PunctuationLeading,Space());
2405 GRAMMAR_LET(Right,KeyBlock(prec::Fun,Target,PostfixStart,capture_t(),PostfixToken->Symbol,PunctuationLeading));
2406 GRAMMAR_RUN(UpdateFrom(Target,Right.BlockTrailing,Gen.Call(
2407 SnipFinished(Target.Start,Right),PostfixToken->PostfixMode,
2408 ApplyTrailing(Target),Right)));
2409 continue;
2410 }
2411 case token(u8"=>"): case token(u8":="): case token(u8"next"): {
2412 // Def := .. { .. | Space ":=" Space (BraceInd | Def) | ..} | ..
2413 // Fun := Def {Space ("=>" Space | "next" Key) (BraceInd | Fun) } StopFun
2414 EatToken();
2415 GRAMMAR_LET(Right,BraceInd(PostfixToken->Symbol,PostfixToken->PostfixRightPrec(),Target));
2416 GRAMMAR_RUN(UpdateFrom(Target,Right.BlockTrailing,Gen.InfixBlock(
2417 SnipFinished(Target.Start,Right),
2418 ApplyTrailing(Target),PostfixToken->Symbol,Right)));
2419 continue;
2420 }
2421 case token(u8"."): {
2422 // Postfix := .. | (.. | ScanKey '.' QualIdent)
2423 if(!IsSpace(Cursor[1]) /*&& !IsEnding(Cursor[1])*/) {
2424 Target.MarkupTag=u8"";
2425 EatToken();
2426 Gen.CaptureAppend(Target.Trailing.TrailingCapture,TokenLeading);
2427 GRAMMAR_LET(Id,QualIdent(u8".",Target,false));
2428 GRAMMAR_RUN(UpdateSpaceTrailing(Target,Gen.InfixToken(
2429 Snip(Target.Start),PostfixToken->PostfixMode,
2430 ApplyTrailing(Target),PostfixToken->Symbol,Id)));
2431 continue;
2432 }
2433 [[fallthrough]]; // Else it's a macro invocation handled below.
2434 }
2435 case token(u8"{"): case token(u8":"): case token(u8"<"): case token(u8"("):
2436 case token(u8"in"): case token(u8"with"):
2437 case token(u8":>"): case token(u8";"): case token(u8","): markup_postfix: {
2438 // Invoke := [Specs] (Paren [Specs] (Block | Do) | Block [[Specs] Do]) (Until | !Until)
2439 // Def := (Or | ..) {&In Def | ..}
2440 invoke InvokeTarget{u8"macro invocation",Target,Target.Start,token::None(),token_set{u8"do"},token_set{u8"until",u8"catch"}};
2442 if(Cursor.Pos==PostfixStart.Pos && InPrefixes.Has(PostfixToken)) {
2443 if(Prec>prec::Def)
2444 goto finished_postfix;
2445 // Parse Def and generate a tokenless definition of Target.
2446 GRAMMAR_RUN(WhenExpr(PostfixToken->Symbol,prec::Def,prec::Def,&Target,capture_t(),[&](expr& InExpr) {
2447 auto InBlock=SingletonBlock(InExpr.Start,InExpr);
2448 return UpdateFrom(Target,InBlock.BlockTrailing,Gen.InfixBlock(
2449 SnipFinished(Target.Start,InExpr),
2450 ApplyTrailing(Target),u8"",InBlock));
2451 }));
2452 }
2453 continue;
2454 }
2455 case token(u8"is"): {
2456 // Def := .. (.. | ScanKey "is" Key (KeyBlock | Def) | ..)
2457 EatToken();
2458 GRAMMAR_LET(PunctuationLeading,Space());
2459 GRAMMAR_LET(Right,KeyBlock(prec::Def,Target,PostfixStart,TokenLeading,u8"is",PunctuationLeading));
2460 GRAMMAR_RUN(UpdateFrom(Target,Right.BlockTrailing,Gen.InfixBlock(
2461 SnipFinished(Target.Start,Right),
2462 ApplyTrailing(Target),u8"is",Right)));
2463 continue;
2464 }
2465 case token(u8"over"): case token(u8"when"): case token(u8"where"): case token(u8"while"): {
2466 // Def := .. (.. | Space "where" Key (KeyBlock | Defs) | ..)
2467 // Fun := Def { Space ('over' | 'upon' | 'while') Key (KeyBlock | Defs) | ..) StopFun
2468 EatToken();
2469 GRAMMAR_LET(Right,KeyBlockDefs(Target,PostfixStart,TokenLeading,PostfixToken->Symbol));
2470 GRAMMAR_RUN(UpdateFrom(Target,Right.BlockTrailing,Gen.InfixBlock(
2471 SnipFinished(Target.Start,Right),
2472 ApplyTrailing(Target),PostfixToken->Symbol,Right)));
2473 continue;
2474 }
2475 case token::NewLine():
2476 GRAMMAR_SET(PostfixToken,ScanKey(TokenLeading,token_set{u8"is",u8"with",u8"{",u8">",u8":>",u8".",u8",",u8";"}));
2477 goto token_leading_loop;
2478 case token(u8"=="):
2479 return S65();
2480 case token(u8"+="): case token(u8"-="): case token(u8"*="): case token(u8"/="):
2481 return S66(PostfixToken->Symbol);
2482 default:
2483 Err(); // Should be unreachable due to precedence.
2484 }
2485 }
2486 return nothing{};
2487 }
2488 result_t<expr*> FinishExpr(token Token,prec FinishPrec,expr& SourceExpr) {
2489 // Preemptively finish and generate syntax for all expressions tighter than FinishPrec,
2490 // producing an error if there is no expression at or looser than FinishPrec.
2491 GRAMMAR_ASSERT(FinishPrec>=prec::Def); // Vital because prec.Def and looser don't handle preemptive finish.
2492 for(auto Expr=&SourceExpr; Expr; Expr=Expr->OuterExpr) {
2493 if(Expr->FinishPrec<=FinishPrec)
2494 if(Token==token::None() || Expr->AllowPostfixes.Has(Token))
2495 return Expr;
2496 Expr->ExprStop=true;
2497 if(!Expr->Finished)
2498 GRAMMAR_RUN(Expr->OnFinish(*this));
2499 }
2500 return nullptr;
2501 }
2502 template<class f> struct when_expr: expr {
2503 using result_type = decltype((*(f*)nullptr)(*(expr*)nullptr));
2504 f F;
2505 result_type Result;
2506 when_expr(prec FinishPrec0,expr* OuterExpr0,token_set PostfixAllow0,const cursor& Start0,
2507 const capture_t& ExprLeading0,const f& F0,expr* QualIdentTarget0=nullptr):
2509 this->ExprLeading=ExprLeading0;
2510 }
2511 result_t<nothing> Parse(parser& Parser,text What,prec ParsePrec,error_t(parser::*OnTokenError)(text),error_t(parser::*OnPrecError)(text,text)) {
2512 scoped_guard ExprDepthGuard(Parser.ExprDepth, Parser.ExprDepth + 1);
2513 if (Parser.ExprDepth > VERSE_MAX_EXPR_DEPTH)
2514 return Parser.S99();
2515 GRAMMAR_RUN(Parser.Base(What,ParsePrec,*this,OnTokenError,OnPrecError));
2516 GRAMMAR_RUN(Parser.Postfix(What,ParsePrec,*this,OnTokenError,OnPrecError));
2517 GRAMMAR_ASSERT(this->Finished);
2518 return nothing{};
2519 }
2520 result_t<nothing> OnFinish(parser& Parser) override {
2521 GRAMMAR_RUN(expr::OnFinish(Parser));
2522 this->ExprSyntax = Parser.Gen.Leading(this->ExprLeading,**this);
2523 GRAMMAR_SET(Result,F(*this));
2524 GRAMMAR_ASSERT(!this->Trailing);
2525 return nothing{};
2526 }
2527 };
2528 template<class f> when_expr(prec,expr*,token_set,const cursor&,const capture_t&,const f&,expr* e=nullptr,bool b=false)->when_expr<f>;
2529 template<class f> typename when_expr<f>::result_type WhenExpr(text What,prec ParsePrec,prec FinishPrec,expr* OuterExpr,
2530 const capture_t& ExprLeading,const f& F,
2531 token_set AllowPostfixes=AllTokens,error_t(parser::*OnTokenError)(text)=&parser::S71,expr* QualIdentTarget0=nullptr) {
2532 // Start parsing an expression which may be finished preemptively by FinishExpr.
2533 auto Target = when_expr(FinishPrec,OuterExpr,AllowPostfixes,Cursor,ExprLeading,F,QualIdentTarget0);
2534 GRAMMAR_RUN(Target.Parse(*this,What,ParsePrec,OnTokenError,&parser::S60));
2535 return Target.Result;
2536 }
2537 result_t<syntax_t> MarkupExpr(expr* OuterMarkup,cursor MarkupStart) {
2538 capture_t Leading;
2539 GRAMMAR_RUN(Scan(Leading));
2540 if(Cursor[0]=='/')
2541 return S42();
2542 auto Expr = when_expr(prec::Call,nullptr,AllTokens,Cursor,Leading,[&](expr& Expr)->result_t<nothing> {
2543 Expr.Trailing = trailing{};
2544 return nothing{};
2545 });
2546 Expr.MarkupStart = MarkupStart;
2547 Expr.OuterMarkup = OuterMarkup;
2548 GRAMMAR_RUN(Expr.Parse(*this,u8"markup",prec::Call,&parser::S74,&parser::S64));
2549 if(!Expr.MarkupFinished)
2550 return S41();
2551 return *Expr;
2552 }
2553
2554 // Separated expressions.
2555 result_t<block_t> Commas(text What,prec Prec,cursor Start,capture_t& Leading,error_t(parser::*OnTokenError)(text)) {
2556 // Commas := Expr {',' Scan Expr}
2557 block_t Block0;
2558 for(;;) {
2559 bool More=false;
2560 GRAMMAR_RUN(WhenExpr(What,Prec,Prec,nullptr,Leading,[&](expr& Expr)->result_t<nothing> {
2561 More=Eat(u8",");
2562 if(More)
2563 ApplyTrailing(Expr,true);
2564 else
2565 Block0.BlockSnippet=Snip(Start,*Expr.Trailing.TrailingStart),
2566 Block0.BlockTrailing.MoveFrom(Expr.Trailing);
2567 Leading=capture_t();
2568 Gen.SyntaxesAppend(Block0.Elements,*Expr);
2569 return nothing{};
2571 if(!More)
2572 return Block0;
2573 Block0.Form=form::Commas;
2574 GRAMMAR_RUN(Scan(Leading));
2575 }
2576 }
2577 result_t<block_t> List(text What,error_t(parser::*OnTokenError)(text),cursor BlockStart,const capture_t& PunctuationLeading,punctuation Punctuation,cursor CommasStart,const capture_t& Leading=capture_t()) {
2578 // Separator := (';'|Ending) Scan
2579 // List := push; set LinePrefix=""; Scan [Commas {Separator Commas} [Separator]]; pop
2580 auto SavedContext = Context;
2581 bool Some = false;
2582 Context.LinePrefix = false;
2583 auto ListBlock = block_t{};
2584 ListBlock.Form = form::List;
2585 ListBlock.PunctuationLeading = PunctuationLeading;
2586 ListBlock.Punctuation = Punctuation;
2587 ListBlock.ElementsTrailing = Leading;
2588 GRAMMAR_RUN(Scan(ListBlock.ElementsTrailing));
2589 if(!StopList.Has(Cursor.Token))
2590 for(;;) {
2592 ApplyTrailing(CommasBlock,Cursor);
2593 CommasBlock.BlockSnippet = Snip(CommasStart);
2594 bool More = false;
2595 if(Cursor.Token==token(u8";") || Ending()) {
2596
2597 // Attribute Commas-trailing [';'] Space &NewLine to CommasBlock, following Scan to ListBlock.
2598 auto SemicolonStart = Cursor;
2599 if(Eat(u8";"))
2600 Gen.Semicolon(CommasBlock.ElementsTrailing,Snip(SemicolonStart));
2602 Gen.CaptureAppend(CommasBlock.ElementsTrailing,SemicolonTrailing);
2603
2604 // Start parsing next list element.
2605 CommasBlock.BlockSnippet = Snip(CommasStart);
2606 CommasStart = Cursor;
2607 GRAMMAR_RUN(Scan(ListBlock.ElementsTrailing));
2608 More = !StopList.Has(Cursor.Token);
2609 }
2610 if(More || Some) {
2611 // Multiple Semicolon or NewLine separated elements.
2612 Some = true;
2613 GRAMMAR_LET(CommasSyntax,Gen.Parenthesis(CommasBlock));
2614 Gen.SyntaxesAppend(ListBlock.Elements,CommasSyntax);
2615 }
2616 else {
2617 // Single Commas block.
2618 Gen.CaptureAppend(CommasBlock.ElementsTrailing,ListBlock.ElementsTrailing);
2619 ListBlock.Form = CommasBlock.Form;
2620 ListBlock.Elements = CommasBlock.Elements;
2621 ListBlock.ElementsTrailing = CommasBlock.ElementsTrailing;
2622 }
2623 if(!More)
2624 break;
2625 }
2626 if(StopList.Has(Cursor.Token)) {
2628 ListBlock.BlockSnippet = Snip(BlockStart);
2629 return ListBlock;
2630 }
2631 return S77();
2632 }
2634 // File := [0oEF 0oBB 0oBF] set Nest=true; set BlockInd=""; set LineInd=""; List Scan end
2635 if(nat8(Cursor[0])==0xEF) {
2636 if(nat8(Cursor[1])==0xBB && nat8(Cursor[2])==0xBF) Next(3);
2637 else return S01();
2638 }
2639 GRAMMAR_LET(Block0,List(u8"",&parser::S70,Cursor,capture_t(),punctuation::None,Cursor));
2640 return Gen.File(Block0);
2641 }
2642 result_t<syntax_t> CheckResult(const result_t<syntax_t>& Result) {
2643 GRAMMAR_LET(Syntax,Result);
2644 if(Cursor[0]!=0)
2645 return S70(u8"");
2646 if(nat(Cursor.Pos-InputString)!=InputLength)
2647 return S01();
2648 return Syntax;
2649 }
2650
2651 // Friends.
2652 template<class t> friend result<typename t::syntax_t,typename t::error_t> File(t& Gen,nat n,const char8* s,nat Line);
2653};
2654
2655//--------------------------------------------------------------------------------------------------------------------------------------------------------------
2656// Default Generator Framework inheriting a user generator.
2657
2658template<class gen_t> struct generate: gen_t {
2659 using syntax_t = typename gen_t::syntax_t;
2660 using syntaxes_t = typename gen_t::syntaxes_t;
2661 using error_t = typename gen_t::error_t;
2662 using capture_t = typename gen_t::capture_t;
2664 template<class t> using result_t = result<t,error_t>;
2665
2666 // Passthrough constructor.
2667 template<class... ts> generate(const ts&... TS): gen_t(TS...) {}
2668
2669 // Default translators from concrete syntax callbacks to abstract syntax callbacks.
2670 result<syntax_t,error_t> Units(const snippet& Snippet,const syntax_t& Num,text Units) const {
2671 GRAMMAR_LET(UnitsIdent,this->Ident(snippet{},u8"units'",Units,u8"'"));
2672 syntaxes_t Parameters;
2673 this->SyntaxesAppend(Parameters,Num);
2674 return this->Call(Snippet,mode::Open,UnitsIdent,block_t{Snippet,Parameters});
2675 }
2677 if(this->SyntaxesLength(Block.Elements)!=1) {
2678 GRAMMAR_LET(Macro,this->Native(snippet{},u8"array"));
2679 return this->Invoke(Block.BlockSnippet,Macro,Block,nullptr,nullptr);
2680 }
2681 else return this->SyntaxesElement(Block.Elements,0);
2682 }
2684 syntaxes_t Characters;
2685 nat n = 0u; // Inexplicably gives "warning C4700: uninitialized local variable 'Length' used" when not initialized separately in Visual Studio 2019 14.29.30139
2686 n=this->CaptureLength(Capture);
2687 for(nat i=0; i<n; i++) {
2688 GRAMMAR_LET(ch,this->Char8(snippet{},this->CaptureElement(Capture,i)));
2689 this->SyntaxesAppend(Characters,ch);
2690 }
2691 GRAMMAR_LET(Macro,this->Native(snippet{},u8"array"));
2692 return this->Invoke(Snippet,Macro,block_t{snippet{},Characters,form::Commas},nullptr,nullptr);
2693 }
2694 result_t<syntax_t> StringInterpolate(const snippet& Snippet,place Place,bool /*Brace*/,const block_t& Block) const {
2695 GRAMMAR_ASSERT(Place==place::String || Place==place::Content);
2696 GRAMMAR_LET(FunctionSyntax,this->Native(snippet{},Place==place::String? u8"ToString": u8"ToMarkup"));
2697 return this->Call(Snippet,mode::Open,FunctionSyntax,Block);
2698 }
2699 result_t<syntax_t> String(const snippet& Snippet,const syntaxes_t& Splices) const {
2700 if(this->SyntaxesLength(Splices)==1)
2701 return this->SyntaxesElement(Splices,0);
2702 if(this->SyntaxesLength(Splices)==0)
2703 return this->Parenthesis(block_t{});
2704 GRAMMAR_LET(FunctionSyntax,this->Native(snippet{},u8"Concatenate"));
2705 return this->Call(Snippet,mode::Open,FunctionSyntax,block_t{snippet{},Splices,form::Commas});
2706 }
2707 result_t<syntax_t> Content(const snippet& Snippet,const syntaxes_t& Splices) const {
2708 return String(Snippet,Splices);
2709 }
2710 result_t<syntax_t> Contents(const snippet& Snippet,const capture_t& /*Leading*/,const syntaxes_t& Splices) const {
2711 GRAMMAR_LET(Macro,this->Native(snippet{},u8"array"));
2712 return this->Invoke(Snippet,Macro,block_t{snippet{},Splices},nullptr,nullptr);
2713 }
2714 result_t<syntax_t> InvokeMarkup(const snippet& Snippet,text /*StartToken*/,const capture_t& /*Leading*/,const syntax_t& Macro,block_t* Clause,block_t* DoClause,const capture_t& /*TokenLeading*/,const capture_t& /*PreContent*/,const syntax_t& Content,const capture_t& /*PostContent*/) const {
2715 GRAMMAR_LET(DefineMacro, this->Native(snippet{},u8"operator':='"));
2716 GRAMMAR_LET(ContentIdent,this->Ident(snippet{},u8"Content",u8"",u8""));
2717 block_t DefineClause; this->SyntaxesAppend(DefineClause .Elements,ContentIdent);
2718 block_t DefineDoClause; this->SyntaxesAppend(DefineDoClause.Elements,Content );
2720 auto LastClause = !Clause? block_t{}: DoClause? *DoClause: *Clause;
2721 this->SyntaxesAppend(LastClause.Elements,ContentSyntax);
2722 return this->Invoke(Snippet,Macro,!DoClause? LastClause: *Clause,DoClause? &LastClause: nullptr,nullptr);
2723 }
2724 result_t<syntax_t> PrefixToken(const snippet& Snippet,mode Mode,text Symbol,const block_t& Block,bool Lift, const syntaxes_t& /*VarAttributes*/ = {}) const {
2725 if(Symbol==u8"in")
2726 Symbol=u8":";
2727 if(Lift)
2728 return this->Err(Snippet,"S98","Feature is not currently supported");
2729 GRAMMAR_LET(Macro,/*IsAlnum(Symbol[0])?
2730 this->Ident(snippet{},Symbol,u8"",u8""):*/
2731 this->Ident(snippet{},u8"prefix'",Symbol,u8"'"));
2732 if(Mode==mode::Open || Mode==mode::Closed)
2733 return this->Call(Snippet,Mode,Macro,Block);
2734 else if(Mode==mode::With)
2735 return this->Invoke(Snippet,Macro,Block,nullptr,nullptr);
2736 else Err();
2737 }
2738 result_t<syntax_t> PrefixBrackets(const snippet& Snippet,const block_t& Left,const block_t& Right) const {
2739 if(Right.Punctuation==punctuation::Braces)
2740 return this->Err(Snippet,"S98","Feature is not currently supported");
2741 if(this->SyntaxesLength(Left.Elements)==0) {
2742 GRAMMAR_LET(Macro,this->Ident(snippet{},u8"prefix'[]'",u8"",u8""));
2743 return this->Call(Snippet,mode::Closed,Macro,Right);
2744 }
2745 GRAMMAR_LET(Macro,this->Ident(snippet{},u8"operator'[]'",u8"",u8""));
2746 block_t Parameters;
2747 GRAMMAR_LET(LeftSyntax ,this->Parenthesis(Left )); this->SyntaxesAppend(Parameters.Elements,LeftSyntax);
2748 GRAMMAR_LET(RightSyntax,this->Parenthesis(Right)); this->SyntaxesAppend(Parameters.Elements,RightSyntax);
2749 Parameters.Form=form::Commas;
2750 return this->Call(Snippet,mode::Closed,Macro,Parameters);
2751 }
2752 result_t<syntax_t> PostfixToken(const snippet& Snippet,mode Mode,const syntax_t& Left,text Symbol) const {
2753 GRAMMAR_LET(Macro,this->Ident(snippet{},u8"operator'",Symbol,u8"'"));
2754 block_t Parameters;
2755 this->SyntaxesAppend(Parameters.Elements,Left);
2756 if(Mode==mode::Open || Mode==mode::Closed)
2757 return this->Call(Snippet,Mode,Macro,Parameters);
2758 else if(Mode==mode::With)
2759 return this->Invoke(Snippet,Macro,Parameters,nullptr,nullptr);
2760 else Err();
2761 }
2762 result_t<syntax_t> InfixToken(const snippet& Snippet,mode Mode,const syntax_t& Left,text Symbol,const syntax_t& Right) const {
2763 if(Symbol==u8"to")
2764 Symbol=u8"->";
2765 GRAMMAR_LET(Macro,this->Ident(snippet{},u8"operator'",Symbol,u8"'"));
2766 block_t Parameters;
2767 this->SyntaxesAppend(Parameters.Elements,Left);
2768 this->SyntaxesAppend(Parameters.Elements,Right);
2769 if(Mode==mode::Closed || Mode==mode::Open)
2770 return Parameters.Form=form::Commas, this->Call(Snippet,Mode,Macro,Parameters);
2771 else if(Mode==mode::With)
2772 return this->Invoke(Snippet,Macro,Parameters,nullptr,nullptr);
2773 else Err();
2774 }
2775 result_t<syntax_t> InfixBlock(const snippet& Snippet,const syntax_t& LeftSyntax,text Symbol,const block_t& Right) const {
2776 if(Symbol==u8"" || Symbol==u8"is" || Symbol==u8"=")
2777 Symbol=u8":=";
2779 this->SyntaxesAppend(LeftBlock.Elements,LeftSyntax);
2780 GRAMMAR_LET(Macro,this->Ident(snippet{},u8"operator'",Symbol,u8"'"));
2781 return this->Invoke(Snippet,Macro,LeftBlock,&Right,nullptr);
2782 }
2783 syntax_t Leading(const capture_t& /*Capture*/,const syntax_t& Syntax) const {
2784 return Syntax;
2785 }
2786 syntax_t Trailing(const syntax_t& Syntax,const capture_t& /*Capture*/) const {
2787 return Syntax;
2788 }
2790 return Parenthesis(Block);
2791 }
2792
2793 // String callbacks that can contribute to abstract syntax.
2794 // In all string callbacks, every non-empty Snippet's text span is guaranteed to be inside the parser's input string.
2795 void Text(capture_t& Capture,const snippet& Snippet,place Place) const {
2796 if(Place==place::Content || Place==place::String)
2797 this->gen_t::Text(Capture,Snippet,Place);
2798 }
2799 void NewLine(capture_t& Capture,const snippet& /*Snippet*/,place Place) const {
2800 if(Place==place::Content) {
2801 char8 Char8 = '\n'; // We normalize markup NewLine to \n.
2802 snippet NewSnippet = {};
2803 NewSnippet.Text = text{&Char8,&Char8+1};
2804 this->gen_t::Text(Capture,NewSnippet,Place);
2805 }
2806 }
2808 if(Place==place::Content || Place==place::String) {
2809 // We pass through backslashed control characters as-is.
2810 char8 Char8 = Backslashed=='n'? '\n': Backslashed=='r'? '\r': Backslashed=='t'? '\t': Backslashed;
2811 snippet NewSnippet = {};
2812 NewSnippet.Text = text{&Char8,&Char8+1};
2813 this->gen_t::Text(Capture,NewSnippet,Place);
2814 }
2815 }
2816
2817 // Optional string callbacks which don't contribute to abstract syntax.
2818 void LineCmt(capture_t& /*Capture*/,const snippet& /*Snippet*/,place /*Place*/,const capture_t& /*Comments*/) const {}
2819 void BlockCmt(capture_t& /*Capture*/,const snippet& /*Snippet*/,place /*Place*/,const capture_t& /*Comments*/) const {}
2820 void IndCmt(capture_t& /*Capture*/,const snippet& /*Snippet*/,place /*Place*/,const capture_t& /*Comments*/) const {}
2821 void Indent(capture_t& /*Capture*/,const snippet& /*Snippet*/,place /*Place*/) const {}
2822 void BlankLine(capture_t& /*Capture*/,const snippet& /*Snippet*/,place /*Place*/) const {}
2823 void Semicolon(capture_t& /*Capture*/,const snippet& /*Snippet*/) const {}
2825 void MarkupStart(capture_t& /*Capture*/,const snippet& /*Snippet*/) const {}
2826 void MarkupTag(capture_t& /*Capture*/,const snippet& /*Snippet*/) const {}
2827 void MarkupStop(capture_t& /*Capture*/,const snippet& /*Snippet*/) const {}
2828 void LinePrefix(capture_t& /*Capture*/,const snippet& /*Snippet*/) const {}
2829};
2830
2831//--------------------------------------------------------------------------------------------------------------------------------------------------------------
2832// Public parsing interface.
2833
2834template<class gen_t> result<typename gen_t::syntax_t,typename gen_t::error_t> File(gen_t& Gen,nat n,const char8* s,nat Line=1) {
2835 auto Parser=parser(Gen,n,s,Line);
2836 return Parser.CheckResult(Parser.File());
2837}
2838
2839}}
RAD_S16 S16
Definition egttypes.h:486
RAD_S64 S64
Definition egttypes.h:506
RAD_S32 S32
Definition egttypes.h:496
UE_FORCEINLINE_HINT FLinearColor operator*(float Scalar, const FLinearColor &Color)
Definition Color.h:473
AUTORTFM_INFER UE_FORCEINLINE_HINT constexpr auto Invoke(FuncType &&Func, ArgTypes &&... Args) -> decltype(((FuncType &&) Func)((ArgTypes &&) Args...))
Definition Invoke.h:44
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
return true
Definition ExternalRpcRegistry.cpp:601
JsonWriter Close()
uint_least32_t char32_t
Definition MarketplaceKitWrapper.h:65
@ Num
Definition MetalRHIPrivate.h:234
const bool
Definition NetworkReplayStreaming.h:178
@ Stop
Definition PrecomputedVolumetricLightmapStreaming.cpp:26
char char8
Definition ReservedSymbols.cpp:14
#define GRAMMAR_RUN(e)
Definition VerseGrammar.h:33
#define VERSE_MAX_INDCMT_DEPTH
Definition VerseGrammar.h:24
#define GRAMMAR_SET(r, e)
Definition VerseGrammar.h:34
#define GRAMMAR_ASSERT(c)
Definition VerseGrammar.h:31
#define VERSE_MAX_EXPR_DEPTH
Definition VerseGrammar.h:20
#define GRAMMAR_LET(r, e)
Definition VerseGrammar.h:35
uint32 Offset
Definition VulkanMemory.cpp:4033
uint32 Size
Definition VulkanMemory.cpp:4034
if(Failed) console_printf("Failed.\n")
@ Line
Definition EnvQueryTypes.h:243
parser
Definition TestServer.py:514
@ Start
Definition GeoEnum.h:100
TValueOrError< FDocument, FParseError > Parse(const FStringView JsonText)
Definition RapidJsonUtils.cpp:233
UE_STRING_CLASS Result(Forward< LhsType >(Lhs), RhsLen)
Definition String.cpp.inl:732
const token_set BracePostfixes
Definition VerseGrammar.h:510
constexpr bool IsSpace(char8 c)
Definition VerseGrammar.h:199
constexpr bool IsNewLine(char8 c)
Definition VerseGrammar.h:200
const token_set StopDef
Definition VerseGrammar.h:509
const token_set StopList
Definition VerseGrammar.h:506
bool ParenthesizePostfix(const encoding &Encoding, prec StringPrec)
Definition VerseGrammar.h:326
const token_set BlockPostfixes
Definition VerseGrammar.h:511
const token_set AllowLess
Definition VerseGrammar.h:503
constexpr bool IsDigit(char8 c)
Definition VerseGrammar.h:203
constexpr nat ArraySize(t(&)[n])
Definition VerseGrammar.h:60
unsigned long long nat
Definition VerseGrammar.h:43
const token_set WithPostfixes
Definition VerseGrammar.h:513
const struct token_set AllTokens AllowLess AllowNotEq
Definition VerseGrammar.h:504
constexpr bool IsAlnum(char8 c)
Definition VerseGrammar.h:204
unsigned long long nat64
Definition VerseGrammar.h:42
void Err()
Definition VerseGrammar.h:63
assoc
Definition VerseGrammar.h:79
constexpr bool IsEnding(char8 c)
Definition VerseGrammar.h:201
constexpr bool IsHex(char8 c)
Definition VerseGrammar.h:205
place
Definition VerseGrammar.h:88
const token_set StopExpr
Definition VerseGrammar.h:507
nat EncodedLength(const char8 *s)
Definition VerseGrammar.h:231
unsigned int nat32
Definition VerseGrammar.h:41
bool operator==(const text &as, const text &bs)
Definition VerseGrammar.h:126
punctuation
Definition VerseGrammar.h:85
constexpr bool IsAlpha(char8 c)
Definition VerseGrammar.h:202
const token_set StopFun
Definition VerseGrammar.h:508
constexpr bool IsIdentifierQuotable(char8 c0, char8 c1)
Definition VerseGrammar.h:207
long long int64
Definition VerseGrammar.h:38
const token_set AllTokens
Definition VerseGrammar.h:502
unsigned char nat8
Definition VerseGrammar.h:39
const token_set ParenPostfixes
Definition VerseGrammar.h:512
bool ParenthesizePrefix(const encoding &Encoding, prec StringPrec)
Definition VerseGrammar.h:323
constexpr nat8 DigitValue(char8 c)
Definition VerseGrammar.h:206
const token_set MarkupPostfixes
Definition VerseGrammar.h:515
const token_set InvokePostfixes
Definition VerseGrammar.h:514
const token_set DefPostfixes
Definition VerseGrammar.h:516
constexpr token_info Tokens[]
Definition VerseGrammar.h:366
char32_t char32
Definition VerseGrammar.h:44
mode
Definition VerseGrammar.h:91
bool operator!=(const text &as, const text &bs)
Definition VerseGrammar.h:134
constexpr bool IsStringBackslashLiteral(char8 c0, char8 c1)
Definition VerseGrammar.h:208
prec
Definition VerseGrammar.h:76
const token_set InPrefixes
Definition VerseGrammar.h:505
char32 EncodedChar32(const char8 *s, nat Count)
Definition VerseGrammar.h:211
form
Definition VerseGrammar.h:82
char char8
Definition VerseGrammar.h:56
unsigned short nat16
Definition VerseGrammar.h:40
Definition Archive.h:36
@ false
Definition radaudio_common.h:23
U16 Index
Definition radfft.cpp:71
Definition VerseGrammar.h:155
capture_t PunctuationTrailing
Definition VerseGrammar.h:167
snippet BlockSnippet
Definition VerseGrammar.h:158
capture_t TokenLeading
Definition VerseGrammar.h:160
capture_t PunctuationLeading
Definition VerseGrammar.h:162
syntaxes_t Elements
Definition VerseGrammar.h:165
block(const snippet &BlockSnippet0=snippet{}, const syntaxes_t &Elements0=syntaxes_t(), form Form0=form::List)
Definition VerseGrammar.h:156
capture_t ElementsTrailing
Definition VerseGrammar.h:166
form Form
Definition VerseGrammar.h:164
syntaxes_t Specifiers
Definition VerseGrammar.h:159
punctuation Punctuation
Definition VerseGrammar.h:163
text Token
Definition VerseGrammar.h:161
Definition VerseGrammar.h:314
bool FollowingIn
Definition VerseGrammar.h:316
prec Prec
Definition VerseGrammar.h:315
encoding(prec Prec0=prec::List, bool AllowIn0=false, bool FollowingIn0=false)
Definition VerseGrammar.h:317
bool AllowIn
Definition VerseGrammar.h:316
encoding Fresh(prec Prec1, bool AllowIn1=false, bool FollowingIn0=false) const
Definition VerseGrammar.h:319
Definition VerseGrammar.h:2658
result< syntax_t, error_t > File(const block_t &Block) const
Definition VerseGrammar.h:2789
result_t< syntax_t > String(const snippet &Snippet, const syntaxes_t &Splices) const
Definition VerseGrammar.h:2699
syntax_t Leading(const capture_t &, const syntax_t &Syntax) const
Definition VerseGrammar.h:2783
result_t< syntax_t > InvokeMarkup(const snippet &Snippet, text, const capture_t &, const syntax_t &Macro, block_t *Clause, block_t *DoClause, const capture_t &, const capture_t &, const syntax_t &Content, const capture_t &) const
Definition VerseGrammar.h:2714
void LineCmt(capture_t &, const snippet &, place, const capture_t &) const
Definition VerseGrammar.h:2818
void BlockCmt(capture_t &, const snippet &, place, const capture_t &) const
Definition VerseGrammar.h:2819
void LinePrefix(capture_t &, const snippet &) const
Definition VerseGrammar.h:2828
result_t< syntax_t > Contents(const snippet &Snippet, const capture_t &, const syntaxes_t &Splices) const
Definition VerseGrammar.h:2710
result_t< syntax_t > StringInterpolate(const snippet &Snippet, place Place, bool, const block_t &Block) const
Definition VerseGrammar.h:2694
void MarkupStart(capture_t &, const snippet &) const
Definition VerseGrammar.h:2825
typename gen_t::capture_t capture_t
Definition VerseGrammar.h:2662
void MarkupTag(capture_t &, const snippet &) const
Definition VerseGrammar.h:2826
void Semicolon(capture_t &, const snippet &) const
Definition VerseGrammar.h:2823
result_t< syntax_t > InfixBlock(const snippet &Snippet, const syntax_t &LeftSyntax, text Symbol, const block_t &Right) const
Definition VerseGrammar.h:2775
result_t< syntax_t > StringLiteral(const snippet &Snippet, const capture_t &Capture) const
Definition VerseGrammar.h:2683
typename gen_t::error_t error_t
Definition VerseGrammar.h:2661
typename gen_t::syntax_t syntax_t
Definition VerseGrammar.h:2659
void MarkupTrim(capture_t &Capture) const
Definition VerseGrammar.h:2824
result_t< syntax_t > PrefixToken(const snippet &Snippet, mode Mode, text Symbol, const block_t &Block, bool Lift, const syntaxes_t &={}) const
Definition VerseGrammar.h:2724
void IndCmt(capture_t &, const snippet &, place, const capture_t &) const
Definition VerseGrammar.h:2820
void NewLine(capture_t &Capture, const snippet &, place Place) const
Definition VerseGrammar.h:2799
syntax_t Trailing(const syntax_t &Syntax, const capture_t &) const
Definition VerseGrammar.h:2786
result_t< syntax_t > Parenthesis(const block_t &Block) const
Definition VerseGrammar.h:2676
result< syntax_t, error_t > Units(const snippet &Snippet, const syntax_t &Num, text Units) const
Definition VerseGrammar.h:2670
void MarkupStop(capture_t &, const snippet &) const
Definition VerseGrammar.h:2827
result_t< syntax_t > PostfixToken(const snippet &Snippet, mode Mode, const syntax_t &Left, text Symbol) const
Definition VerseGrammar.h:2752
generate(const ts &... TS)
Definition VerseGrammar.h:2667
result_t< syntax_t > InfixToken(const snippet &Snippet, mode Mode, const syntax_t &Left, text Symbol, const syntax_t &Right) const
Definition VerseGrammar.h:2762
void BlankLine(capture_t &, const snippet &, place) const
Definition VerseGrammar.h:2822
void StringBackslash(capture_t &Capture, const snippet &, place Place, char8 Backslashed) const
Definition VerseGrammar.h:2807
void Indent(capture_t &, const snippet &, place) const
Definition VerseGrammar.h:2821
result_t< syntax_t > Content(const snippet &Snippet, const syntaxes_t &Splices) const
Definition VerseGrammar.h:2707
void Text(capture_t &Capture, const snippet &Snippet, place Place) const
Definition VerseGrammar.h:2795
result_t< syntax_t > PrefixBrackets(const snippet &Snippet, const block_t &Left, const block_t &Right) const
Definition VerseGrammar.h:2738
typename gen_t::syntaxes_t syntaxes_t
Definition VerseGrammar.h:2660
Definition VerseGrammar.h:73
Definition VerseGrammar.h:522
friend struct parser
Definition VerseGrammar.h:524
Definition VerseGrammar.h:682
friend result< typename t::syntax_t, typename t::error_t > File(t &Gen, nat n, const char8 *s, nat Line)
Definition VerseGrammar.h:171
~result()
Definition VerseGrammar.h:184
value_t * operator->()
Definition VerseGrammar.h:188
result & operator=(const result &R)
Definition VerseGrammar.h:186
const error_t & GetError() const
Definition VerseGrammar.h:189
error_t Error
Definition VerseGrammar.h:191
value_t Value
Definition VerseGrammar.h:191
result()
Definition VerseGrammar.h:176
result(const u &Value0)
Definition VerseGrammar.h:172
result(const error_t &Error0)
Definition VerseGrammar.h:177
result(const result &Other)
Definition VerseGrammar.h:178
const value_t & operator*() const
Definition VerseGrammar.h:187
result(u &&Value0)
Definition VerseGrammar.h:174
Definition VerseGrammar.h:94
~scoped_guard()
Definition VerseGrammar.h:101
scoped_guard(t &_guard_variable, const t &new_value)
Definition VerseGrammar.h:95
Definition VerseGrammar.h:139
snippet()
Definition VerseGrammar.h:143
nat StopLine
Definition VerseGrammar.h:141
nat StopColumn
Definition VerseGrammar.h:142
nat StartColumn
Definition VerseGrammar.h:142
nat StartLine
Definition VerseGrammar.h:141
text Text
Definition VerseGrammar.h:140
Definition VerseGrammar.h:112
constexpr text()
Definition VerseGrammar.h:114
const char8 * Stop
Definition VerseGrammar.h:113
constexpr char8 operator[](nat i) const
Definition VerseGrammar.h:120
const char8 * Start
Definition VerseGrammar.h:113
constexpr text(const char8 *Start0)
Definition VerseGrammar.h:116
constexpr text(const char8 *Start0, const char8 *Stop0)
Definition VerseGrammar.h:115
Definition VerseGrammar.h:337
const char8 * Symbol
Definition VerseGrammar.h:338
prec PostfixPrec
Definition VerseGrammar.h:342
const token_set & PostfixAllowMask
Definition VerseGrammar.h:345
encoding PostfixLeftEncoding(const encoding &Encoding, bool Parens) const
Definition VerseGrammar.h:346
encoding PostfixRightEncoding(const encoding &Encoding, bool Parens) const
Definition VerseGrammar.h:354
prec PostfixRightPrec() const
Definition VerseGrammar.h:361
mode PrefixMode
Definition VerseGrammar.h:340
prec PostfixTokenPrec
Definition VerseGrammar.h:341
mode PostfixMode
Definition VerseGrammar.h:344
assoc PostfixAssoc
Definition VerseGrammar.h:343
prec PrefixPrec
Definition VerseGrammar.h:339
Definition VerseGrammar.h:473
constexpr token_set operator~() const
Definition VerseGrammar.h:491
constexpr token_set(token T, ts... TS)
Definition VerseGrammar.h:475
constexpr bool Has(token T) const
Definition VerseGrammar.h:479
constexpr token_set(const char8 *S, ts... TS)
Definition VerseGrammar.h:478
constexpr token_set()
Definition VerseGrammar.h:474
constexpr token_set operator|(const token_set &Other) const
Definition VerseGrammar.h:488
constexpr token_set operator&(const token_set &Other) const
Definition VerseGrammar.h:485
Definition VerseGrammar.h:445
static constexpr token None()
Definition VerseGrammar.h:458
constexpr const token_info * operator->() const
Definition VerseGrammar.h:467
constexpr token(const char8 *Op)
Definition VerseGrammar.h:448
static constexpr token End()
Definition VerseGrammar.h:459
static constexpr token FirstParse()
Definition VerseGrammar.h:463
static constexpr token NewLine()
Definition VerseGrammar.h:460
static constexpr token Digit()
Definition VerseGrammar.h:462
nat8 Index
Definition VerseGrammar.h:446
static constexpr token Alpha()
Definition VerseGrammar.h:461
constexpr token(nat8 Index0)
Definition VerseGrammar.h:447