// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= HlslLexer.cpp - Implementation for scanning & tokenizing hlsl =============================================================================*/ #include "HlslLexer.h" namespace CrossCompiler { #define MATCH_TARGET(S) S, (UE_ARRAY_COUNT(S) - 1) typedef FPlatformTypes::TCHAR TCHAR; static FORCEINLINE bool IsSpaceOrTab(TCHAR Char) { return Char == ' ' || Char == '\t'; } static FORCEINLINE bool IsEOL(TCHAR Char) { return Char == '\r' || Char == '\n'; } static FORCEINLINE bool IsSpaceOrTabOrEOL(TCHAR Char) { return IsEOL(Char) || IsSpaceOrTab(Char); } static FORCEINLINE bool IsAlpha(TCHAR Char) { return (Char >= 'a' && Char <= 'z') || (Char >= 'A' && Char <= 'Z'); } static FORCEINLINE bool IsDigit(TCHAR Char) { return Char >= '0' && Char <= '9'; } static FORCEINLINE bool IsHexDigit(TCHAR Char) { return IsDigit(Char) || (Char >= 'a' && Char <= 'f') || (Char >= 'A' && Char <= 'F'); } static FORCEINLINE bool IsAlphaOrDigit(TCHAR Char) { return IsAlpha(Char) || IsDigit(Char); } struct FKeywordToken { EHlslToken Current; void* Map; FKeywordToken() : Current(EHlslToken::Invalid), Map(nullptr) {} }; typedef TMap TCharKeywordTokenMap; TCharKeywordTokenMap Keywords; static void InsertToken(const TCHAR* String, EHlslToken Token) { TCharKeywordTokenMap* Map = &Keywords; while (*String) { FKeywordToken& KT = Map->FindOrAdd(*String); ++String; if (!*String) { KT.Current = Token; return; } if (!KT.Map) { KT.Map = new TCharKeywordTokenMap(); } Map = (TCharKeywordTokenMap*)KT.Map; } } static bool MatchSymbolToken(const TCHAR* InString, const TCHAR** OutString, EHlslToken& OutToken, TStringBuilderBase* OutTokenString, bool bGreedy) { const TCHAR* OriginalString = InString; FKeywordToken* Found = Keywords.Find(*InString); if (OutString) { *OutString = OriginalString; } if (!Found) { return false; } do { ++InString; if (Found->Map) { auto* Map = (TCharKeywordTokenMap*)Found->Map; FKeywordToken* NewFound = Map->Find(*InString); if (!NewFound) { if (Found->Current != EHlslToken::Invalid) { // Don't early out on a partial match (e.g., Texture1DSample should not be 2 tokens) if (!bGreedy || !*InString) { OutToken = Found->Current; if (OutTokenString) { OutTokenString->Reset(); OutTokenString->Append(OriginalString, InString - OriginalString); } if (OutString) { *OutString = InString; } return true; } } return false; } Found = NewFound; } else if (bGreedy && *InString) { break; } else { OutToken = Found->Current; if (OutTokenString) { OutTokenString->Reset(); OutTokenString->Append(OriginalString, InString - OriginalString); } if (OutString) { *OutString = InString; } return true; } } while (*InString); return false; } namespace Tokens { static struct FStaticInitializer { FStaticInitializer() { // Math InsertToken(TEXT("+"), EHlslToken::Plus); InsertToken(TEXT("+="), EHlslToken::PlusEqual); InsertToken(TEXT("-"), EHlslToken::Minus); InsertToken(TEXT("-="), EHlslToken::MinusEqual); InsertToken(TEXT("*"), EHlslToken::Times); InsertToken(TEXT("*="), EHlslToken::TimesEqual); InsertToken(TEXT("/"), EHlslToken::Div); InsertToken(TEXT("/="), EHlslToken::DivEqual); InsertToken(TEXT("%"), EHlslToken::Mod); InsertToken(TEXT("%="), EHlslToken::ModEqual); InsertToken(TEXT("("), EHlslToken::LeftParenthesis); InsertToken(TEXT(")"), EHlslToken::RightParenthesis); // Logical InsertToken(TEXT("=="), EHlslToken::EqualEqual); InsertToken(TEXT("!="), EHlslToken::NotEqual); InsertToken(TEXT("<"), EHlslToken::Lower); InsertToken(TEXT("<="), EHlslToken::LowerEqual); InsertToken(TEXT(">"), EHlslToken::Greater); InsertToken(TEXT(">="), EHlslToken::GreaterEqual); InsertToken(TEXT("&&"), EHlslToken::AndAnd); InsertToken(TEXT("||"), EHlslToken::OrOr); // Bit InsertToken(TEXT("<<"), EHlslToken::LowerLower); InsertToken(TEXT("<<="), EHlslToken::LowerLowerEqual); InsertToken(TEXT(">>"), EHlslToken::GreaterGreater); InsertToken(TEXT(">>="), EHlslToken::GreaterGreaterEqual); InsertToken(TEXT("&"), EHlslToken::And); InsertToken(TEXT("&="), EHlslToken::AndEqual); InsertToken(TEXT("|"), EHlslToken::Or); InsertToken(TEXT("|="), EHlslToken::OrEqual); InsertToken(TEXT("^"), EHlslToken::Xor); InsertToken(TEXT("^="), EHlslToken::XorEqual); InsertToken(TEXT("!"), EHlslToken::Not); InsertToken(TEXT("~"), EHlslToken::Neg); // Statements/Keywords InsertToken(TEXT("="), EHlslToken::Equal); InsertToken(TEXT("{"), EHlslToken::LeftBrace); InsertToken(TEXT("}"), EHlslToken::RightBrace); InsertToken(TEXT(";"), EHlslToken::Semicolon); InsertToken(TEXT("if"), EHlslToken::If); InsertToken(TEXT("else"), EHlslToken::Else); InsertToken(TEXT("for"), EHlslToken::For); InsertToken(TEXT("while"), EHlslToken::While); InsertToken(TEXT("do"), EHlslToken::Do); InsertToken(TEXT("return"), EHlslToken::Return); InsertToken(TEXT("switch"), EHlslToken::Switch); InsertToken(TEXT("case"), EHlslToken::Case); InsertToken(TEXT("break"), EHlslToken::Break); InsertToken(TEXT("default"), EHlslToken::Default); InsertToken(TEXT("continue"), EHlslToken::Continue); InsertToken(TEXT("goto"), EHlslToken::Goto); // Unary InsertToken(TEXT("++"), EHlslToken::PlusPlus); InsertToken(TEXT("--"), EHlslToken::MinusMinus); // Types InsertToken(TEXT("void"), EHlslToken::Void); InsertToken(TEXT("const"), EHlslToken::Const); InsertToken(TEXT("precise"), EHlslToken::Precise); InsertToken(TEXT("bool"), EHlslToken::Bool); InsertToken(TEXT("bool1"), EHlslToken::Bool1); InsertToken(TEXT("bool2"), EHlslToken::Bool2); InsertToken(TEXT("bool3"), EHlslToken::Bool3); InsertToken(TEXT("bool4"), EHlslToken::Bool4); InsertToken(TEXT("bool1x1"), EHlslToken::Bool1x1); InsertToken(TEXT("bool2x1"), EHlslToken::Bool2x1); InsertToken(TEXT("bool3x1"), EHlslToken::Bool3x1); InsertToken(TEXT("bool4x1"), EHlslToken::Bool4x1); InsertToken(TEXT("bool1x2"), EHlslToken::Bool1x2); InsertToken(TEXT("bool2x2"), EHlslToken::Bool2x2); InsertToken(TEXT("bool3x2"), EHlslToken::Bool3x2); InsertToken(TEXT("bool4x2"), EHlslToken::Bool4x2); InsertToken(TEXT("bool1x3"), EHlslToken::Bool1x3); InsertToken(TEXT("bool2x3"), EHlslToken::Bool2x3); InsertToken(TEXT("bool3x3"), EHlslToken::Bool3x3); InsertToken(TEXT("bool4x3"), EHlslToken::Bool4x3); InsertToken(TEXT("bool1x4"), EHlslToken::Bool1x4); InsertToken(TEXT("bool2x4"), EHlslToken::Bool2x4); InsertToken(TEXT("bool3x4"), EHlslToken::Bool3x4); InsertToken(TEXT("bool4x4"), EHlslToken::Bool4x4); InsertToken(TEXT("int"), EHlslToken::Int); InsertToken(TEXT("int1"), EHlslToken::Int1); InsertToken(TEXT("int2"), EHlslToken::Int2); InsertToken(TEXT("int3"), EHlslToken::Int3); InsertToken(TEXT("int4"), EHlslToken::Int4); InsertToken(TEXT("int1x1"), EHlslToken::Int1x1); InsertToken(TEXT("int2x1"), EHlslToken::Int2x1); InsertToken(TEXT("int3x1"), EHlslToken::Int3x1); InsertToken(TEXT("int4x1"), EHlslToken::Int4x1); InsertToken(TEXT("int1x2"), EHlslToken::Int1x2); InsertToken(TEXT("int2x2"), EHlslToken::Int2x2); InsertToken(TEXT("int3x2"), EHlslToken::Int3x2); InsertToken(TEXT("int4x2"), EHlslToken::Int4x2); InsertToken(TEXT("int1x3"), EHlslToken::Int1x3); InsertToken(TEXT("int2x3"), EHlslToken::Int2x3); InsertToken(TEXT("int3x3"), EHlslToken::Int3x3); InsertToken(TEXT("int4x3"), EHlslToken::Int4x3); InsertToken(TEXT("int1x4"), EHlslToken::Int1x4); InsertToken(TEXT("int2x4"), EHlslToken::Int2x4); InsertToken(TEXT("int3x4"), EHlslToken::Int3x4); InsertToken(TEXT("int4x4"), EHlslToken::Int4x4); InsertToken(TEXT("uint"), EHlslToken::Uint); InsertToken(TEXT("uint1"), EHlslToken::Uint1); InsertToken(TEXT("uint2"), EHlslToken::Uint2); InsertToken(TEXT("uint3"), EHlslToken::Uint3); InsertToken(TEXT("uint4"), EHlslToken::Uint4); InsertToken(TEXT("uint1x1"), EHlslToken::Uint1x1); InsertToken(TEXT("uint2x1"), EHlslToken::Uint2x1); InsertToken(TEXT("uint3x1"), EHlslToken::Uint3x1); InsertToken(TEXT("uint4x1"), EHlslToken::Uint4x1); InsertToken(TEXT("uint1x2"), EHlslToken::Uint1x2); InsertToken(TEXT("uint2x2"), EHlslToken::Uint2x2); InsertToken(TEXT("uint3x2"), EHlslToken::Uint3x2); InsertToken(TEXT("uint4x2"), EHlslToken::Uint4x2); InsertToken(TEXT("uint1x3"), EHlslToken::Uint1x3); InsertToken(TEXT("uint2x3"), EHlslToken::Uint2x3); InsertToken(TEXT("uint3x3"), EHlslToken::Uint3x3); InsertToken(TEXT("uint4x3"), EHlslToken::Uint4x3); InsertToken(TEXT("uint1x4"), EHlslToken::Uint1x4); InsertToken(TEXT("uint2x4"), EHlslToken::Uint2x4); InsertToken(TEXT("uint3x4"), EHlslToken::Uint3x4); InsertToken(TEXT("uint4x4"), EHlslToken::Uint4x4); InsertToken(TEXT("uint64_t"), EHlslToken::Uint64_t); InsertToken(TEXT("uint64_t1"), EHlslToken::Uint64_t1); InsertToken(TEXT("uint64_t2"), EHlslToken::Uint64_t2); InsertToken(TEXT("uint64_t3"), EHlslToken::Uint64_t3); InsertToken(TEXT("uint64_t4"), EHlslToken::Uint64_t4); InsertToken(TEXT("uint64_t1x1"), EHlslToken::Uint64_t1x1); InsertToken(TEXT("uint64_t2x1"), EHlslToken::Uint64_t2x1); InsertToken(TEXT("uint64_t3x1"), EHlslToken::Uint64_t3x1); InsertToken(TEXT("uint64_t4x1"), EHlslToken::Uint64_t4x1); InsertToken(TEXT("uint64_t1x2"), EHlslToken::Uint64_t1x2); InsertToken(TEXT("uint64_t2x2"), EHlslToken::Uint64_t2x2); InsertToken(TEXT("uint64_t3x2"), EHlslToken::Uint64_t3x2); InsertToken(TEXT("uint64_t4x2"), EHlslToken::Uint64_t4x2); InsertToken(TEXT("uint64_t1x3"), EHlslToken::Uint64_t1x3); InsertToken(TEXT("uint64_t2x3"), EHlslToken::Uint64_t2x3); InsertToken(TEXT("uint64_t3x3"), EHlslToken::Uint64_t3x3); InsertToken(TEXT("uint64_t4x3"), EHlslToken::Uint64_t4x3); InsertToken(TEXT("uint64_t1x4"), EHlslToken::Uint64_t1x4); InsertToken(TEXT("uint64_t2x4"), EHlslToken::Uint64_t2x4); InsertToken(TEXT("uint64_t3x4"), EHlslToken::Uint64_t3x4); InsertToken(TEXT("uint64_t4x4"), EHlslToken::Uint64_t4x4); InsertToken(TEXT("ulong"), EHlslToken::Uint64_t); InsertToken(TEXT("ulong2"), EHlslToken::Uint64_t2); InsertToken(TEXT("ulong3"), EHlslToken::Uint64_t3); InsertToken(TEXT("ulong4"), EHlslToken::Uint64_t4); InsertToken(TEXT("half"), EHlslToken::Half); InsertToken(TEXT("half1"), EHlslToken::Half1); InsertToken(TEXT("half2"), EHlslToken::Half2); InsertToken(TEXT("half3"), EHlslToken::Half3); InsertToken(TEXT("half4"), EHlslToken::Half4); InsertToken(TEXT("half1x1"), EHlslToken::Half1x1); InsertToken(TEXT("half2x1"), EHlslToken::Half2x1); InsertToken(TEXT("half3x1"), EHlslToken::Half3x1); InsertToken(TEXT("half4x1"), EHlslToken::Half4x1); InsertToken(TEXT("half1x2"), EHlslToken::Half1x2); InsertToken(TEXT("half2x2"), EHlslToken::Half2x2); InsertToken(TEXT("half3x2"), EHlslToken::Half3x2); InsertToken(TEXT("half4x2"), EHlslToken::Half4x2); InsertToken(TEXT("half1x3"), EHlslToken::Half1x3); InsertToken(TEXT("half2x3"), EHlslToken::Half2x3); InsertToken(TEXT("half3x3"), EHlslToken::Half3x3); InsertToken(TEXT("half4x3"), EHlslToken::Half4x3); InsertToken(TEXT("half1x4"), EHlslToken::Half1x4); InsertToken(TEXT("half2x4"), EHlslToken::Half2x4); InsertToken(TEXT("half3x4"), EHlslToken::Half3x4); InsertToken(TEXT("half4x4"), EHlslToken::Half4x4); InsertToken(TEXT("min16float"), EHlslToken::Min16Float); InsertToken(TEXT("min16float1"), EHlslToken::Min16Float1); InsertToken(TEXT("min16float2"), EHlslToken::Min16Float2); InsertToken(TEXT("min16float3"), EHlslToken::Min16Float3); InsertToken(TEXT("min16float4"), EHlslToken::Min16Float4); InsertToken(TEXT("min16float1x1"), EHlslToken::Min16Float1x1); InsertToken(TEXT("min16float2x1"), EHlslToken::Min16Float2x1); InsertToken(TEXT("min16float3x1"), EHlslToken::Min16Float3x1); InsertToken(TEXT("min16float4x1"), EHlslToken::Min16Float4x1); InsertToken(TEXT("min16float1x2"), EHlslToken::Min16Float1x2); InsertToken(TEXT("min16float2x2"), EHlslToken::Min16Float2x2); InsertToken(TEXT("min16float3x2"), EHlslToken::Min16Float3x2); InsertToken(TEXT("min16float4x2"), EHlslToken::Min16Float4x2); InsertToken(TEXT("min16float1x3"), EHlslToken::Min16Float1x3); InsertToken(TEXT("min16float2x3"), EHlslToken::Min16Float2x3); InsertToken(TEXT("min16float3x3"), EHlslToken::Min16Float3x3); InsertToken(TEXT("min16float4x3"), EHlslToken::Min16Float4x3); InsertToken(TEXT("min16float1x4"), EHlslToken::Min16Float1x4); InsertToken(TEXT("min16float2x4"), EHlslToken::Min16Float2x4); InsertToken(TEXT("min16float3x4"), EHlslToken::Min16Float3x4); InsertToken(TEXT("min16float4x4"), EHlslToken::Min16Float4x4); InsertToken(TEXT("float"), EHlslToken::Float); InsertToken(TEXT("float1"), EHlslToken::Float1); InsertToken(TEXT("float2"), EHlslToken::Float2); InsertToken(TEXT("float3"), EHlslToken::Float3); InsertToken(TEXT("float4"), EHlslToken::Float4); InsertToken(TEXT("float1x1"), EHlslToken::Float1x1); InsertToken(TEXT("float2x1"), EHlslToken::Float2x1); InsertToken(TEXT("float3x1"), EHlslToken::Float3x1); InsertToken(TEXT("float4x1"), EHlslToken::Float4x1); InsertToken(TEXT("float1x2"), EHlslToken::Float1x2); InsertToken(TEXT("float2x2"), EHlslToken::Float2x2); InsertToken(TEXT("float3x2"), EHlslToken::Float3x2); InsertToken(TEXT("float4x2"), EHlslToken::Float4x2); InsertToken(TEXT("float1x3"), EHlslToken::Float1x3); InsertToken(TEXT("float2x3"), EHlslToken::Float2x3); InsertToken(TEXT("float3x3"), EHlslToken::Float3x3); InsertToken(TEXT("float4x3"), EHlslToken::Float4x3); InsertToken(TEXT("float1x4"), EHlslToken::Float1x4); InsertToken(TEXT("float2x4"), EHlslToken::Float2x4); InsertToken(TEXT("float3x4"), EHlslToken::Float3x4); InsertToken(TEXT("float4x4"), EHlslToken::Float4x4); InsertToken(TEXT("texture"), EHlslToken::Texture); InsertToken(TEXT("Texture1D"), EHlslToken::Texture1D); InsertToken(TEXT("Texture1DArray"), EHlslToken::Texture1DArray); InsertToken(TEXT("Texture1D_Array"), EHlslToken::Texture1DArray); // PSSL InsertToken(TEXT("Texture2D"), EHlslToken::Texture2D); InsertToken(TEXT("Texture2DArray"), EHlslToken::Texture2DArray); InsertToken(TEXT("Texture2D_Array"), EHlslToken::Texture2DArray); // PSSL InsertToken(TEXT("Texture2DMS"), EHlslToken::Texture2DMS); InsertToken(TEXT("MS_Texture2D"), EHlslToken::Texture2DMS); // PSSL InsertToken(TEXT("Texture2DMSArray"), EHlslToken::Texture2DMSArray); InsertToken(TEXT("MS_Texture2D_Array"), EHlslToken::Texture2DMS); // PSSL InsertToken(TEXT("Texture3D"), EHlslToken::Texture3D); InsertToken(TEXT("TextureCube"), EHlslToken::TextureCube); InsertToken(TEXT("TextureCubeArray"), EHlslToken::TextureCubeArray); InsertToken(TEXT("TextureCube_Array"), EHlslToken::TextureCubeArray); // PSSL InsertToken(TEXT("sampler"), EHlslToken::Sampler); InsertToken(TEXT("sampler1D"), EHlslToken::Sampler1D); InsertToken(TEXT("sampler2D"), EHlslToken::Sampler2D); InsertToken(TEXT("sampler3D"), EHlslToken::Sampler3D); InsertToken(TEXT("samplerCUBE"), EHlslToken::SamplerCube); InsertToken(TEXT("SamplerState"), EHlslToken::SamplerState); InsertToken(TEXT("SamplerComparisonState"), EHlslToken::SamplerComparisonState); InsertToken(TEXT("Buffer"), EHlslToken::Buffer); InsertToken(TEXT("DataBuffer"), EHlslToken::Buffer); // PSSL InsertToken(TEXT("AppendStructuredBuffer"), EHlslToken::AppendStructuredBuffer); InsertToken(TEXT("AppendRegularBuffer"), EHlslToken::AppendStructuredBuffer); // PSSL InsertToken(TEXT("ByteAddressBuffer"), EHlslToken::ByteAddressBuffer); InsertToken(TEXT("ByteBuffer"), EHlslToken::ByteAddressBuffer); // PSSL InsertToken(TEXT("ConsumeStructuredBuffer"), EHlslToken::ConsumeStructuredBuffer); InsertToken(TEXT("ConsumeRegularBuffer"), EHlslToken::ConsumeStructuredBuffer); // PSSL InsertToken(TEXT("RWBuffer"), EHlslToken::RWBuffer); InsertToken(TEXT("RW_DataBuffer"), EHlslToken::RWBuffer); // PSSL InsertToken(TEXT("RWByteAddressBuffer"), EHlslToken::RWByteAddressBuffer); InsertToken(TEXT("RW_ByteBuffer"), EHlslToken::RWByteAddressBuffer); // PSSL InsertToken(TEXT("RWStructuredBuffer"), EHlslToken::RWStructuredBuffer); InsertToken(TEXT("RW_RegularBuffer"), EHlslToken::RWStructuredBuffer); // PSSL InsertToken(TEXT("RWTexture1D"), EHlslToken::RWTexture1D); InsertToken(TEXT("RW_Texture1D"), EHlslToken::RWTexture1D); // PSSL InsertToken(TEXT("RWTexture1DArray"), EHlslToken::RWTexture1DArray); InsertToken(TEXT("RW_Texture1D_Array"), EHlslToken::RWTexture1DArray); // PSSL InsertToken(TEXT("RWTexture2D"), EHlslToken::RWTexture2D); InsertToken(TEXT("RW_Texture2D"), EHlslToken::RWTexture2D); // PSSL InsertToken(TEXT("RWTexture2DArray"), EHlslToken::RWTexture2DArray); InsertToken(TEXT("RW_Texture2D_Array"), EHlslToken::RWTexture2DArray); // PSSL InsertToken(TEXT("RasterizerOrderedTexture2D"), EHlslToken::RasterizerOrderedTexture2D); InsertToken(TEXT("RWTexture3D"), EHlslToken::RWTexture3D); InsertToken(TEXT("RW_Texture3D"), EHlslToken::RWTexture3D); // PSSL InsertToken(TEXT("StructuredBuffer"), EHlslToken::StructuredBuffer); InsertToken(TEXT("RegularBuffer"), EHlslToken::StructuredBuffer); // PSSL InsertToken(TEXT("ConstantBuffer"), EHlslToken::ConstantBuffer); InsertToken(TEXT("RaytracingAccelerationStructure"), EHlslToken::RaytracingAccelerationStructure); // Modifiers InsertToken(TEXT("in"), EHlslToken::In); InsertToken(TEXT("out"), EHlslToken::Out); InsertToken(TEXT("inout"), EHlslToken::InOut); InsertToken(TEXT("static"), EHlslToken::Static); InsertToken(TEXT("uniform"), EHlslToken::Uniform); // Misc InsertToken(TEXT("["), EHlslToken::LeftSquareBracket); InsertToken(TEXT("]"), EHlslToken::RightSquareBracket); InsertToken(TEXT("?"), EHlslToken::Question); InsertToken(TEXT("::"), EHlslToken::ColonColon); InsertToken(TEXT(":"), EHlslToken::Colon); InsertToken(TEXT(","), EHlslToken::Comma); InsertToken(TEXT("."), EHlslToken::Dot); InsertToken(TEXT("struct"), EHlslToken::Struct); InsertToken(TEXT("class"), EHlslToken::Struct); // Scan as equivalent to "struct" InsertToken(TEXT("cbuffer"), EHlslToken::CBuffer); InsertToken(TEXT("ConstantBuffer"), EHlslToken::ConstantBuffer); InsertToken(TEXT("groupshared"), EHlslToken::GroupShared); InsertToken(TEXT("row_major"), EHlslToken::RowMajor); InsertToken(TEXT("register"), EHlslToken::Register); InsertToken(TEXT("inline"), EHlslToken::Inline); InsertToken(TEXT("typedef"), EHlslToken::Typedef); InsertToken(TEXT("packoffset"), EHlslToken::PackOffset); InsertToken(TEXT("namespace"), EHlslToken::Namespace); InsertToken(TEXT("operator"), EHlslToken::Operator); InsertToken(TEXT("_Static_assert"), EHlslToken::StaticAssert); // HLSL2021 adopted C11 '_Static_assert'-statements InsertToken(TEXT("static_assert"), EHlslToken::StaticAssert); // Some shader compilers support C++11 'static_assert'-statements InsertToken(TEXT("_Pragma"), EHlslToken::C99Pragma); // C99/C++11 style pragma (distinct from #pragma because argument is a string constant) } } GStaticInitializer; } struct FTokenizer { FString Filename; const TCHAR* Current; const TCHAR* End; const TCHAR* CurrentLineStart; int32 Line; FTokenizer(const FString& InString, const FString& InFilename = TEXT("")) : Filename(InFilename), Current(nullptr), End(nullptr), CurrentLineStart(nullptr), Line(0) { if (InString.Len() > 0) { Current = *InString; End = *InString + InString.Len(); Line = 1; CurrentLineStart = Current; } } bool HasCharsAvailable() const { return Current < End; } void SkipWhitespaceInLine() { while (HasCharsAvailable()) { auto Char = Peek(); if (!IsSpaceOrTab(Char)) { break; } ++Current; } } void SkipWhitespaceAndEmptyLines() { while (HasCharsAvailable()) { SkipWhitespaceInLine(); auto Char = Peek(); if (IsEOL(Char)) { SkipToNextLine(); } else { auto NextChar = Peek(1); if (Char == '/' && NextChar == '/') { // C++ comment Current += 2; this->SkipToNextLine(); continue; } else if (Char == '/' && NextChar == '*') { // C Style comment, eat everything up to * / Current += 2; bool bClosedComment = false; while (HasCharsAvailable()) { if (Peek() == '*') { if (Peek(1) == '/') { bClosedComment = true; Current += 2; break; } } else if (Peek() == '\n') { SkipToNextLine(); // Don't increment current! continue; } ++Current; } //@todo-rco: Error if no closing * / found and we got to EOL //check(bClosedComment); } else { break; } } } } TCHAR Peek() const { if (HasCharsAvailable()) { return *Current; } return 0; } TCHAR Peek(int32 Delta) const { check(Delta > 0); if (Current + Delta < End) { return Current[Delta]; } return 0; } void SkipToNextLine() { while (HasCharsAvailable()) { auto Char = Peek(); ++Current; if (Char == '\r' && Peek() == '\n') { ++Current; break; } else if (Char == '\n') { break; } } ++Line; CurrentLineStart = Current; } bool MatchString(FStringView String) { return MatchString(String.GetData(), String.Len()); } bool MatchString(const TCHAR* Target, int32 TargetLen) { if (Current + TargetLen <= End) { if (FCString::Strncmp(Current, Target, TargetLen) == 0) { Current += TargetLen; return true; } } return false; } bool PeekDigit() const { return IsDigit(Peek()); } bool MatchAndSkipDigits() { auto* Original = Current; while (PeekDigit()) { ++Current; } return Original != Current; } bool Match(TCHAR Char) { if (Char == Peek()) { ++Current; return true; } return false; } inline bool IsSwizzleDigit(TCHAR Char) { switch (Char) { case 'r': case 'g': case 'b': case 'a': case 'x': case 'y': case 'z': case 'w': return true; default: return false; } } bool MatchLiteralFloat(TStringBuilderBase& OutLiteral, ELiteralType& OutType) { auto* Original = Current; TCHAR Char = Peek(); ELiteralType Type = ELiteralType::Float; // \.[0-9]+([eE][+-]?[0-9]+)?[fF]? -> Dot Digits+ Exp? F? // [0-9]+\.([eE][+-]?[0-9]+)?[fF]? -> Digits+ Dot Exp? F? // [0-9]+\.[0-9]+([eE][+-]?[0-9]+)?[fF]? -> Digits+ Dot Digits+ Exp? F? // [0-9]+[eE][+-]?[0-9]+[fF]? -> Digits+ Exp F? // [0-9]+[fF] -> Digits+ F // 1.#INF -> Infinity constant if (!IsDigit(Char) && Char != '.') { return false; } bool bExpOptional = false; // Differentiate between 1. and 1.rr for example if (Char == '.' && IsSwizzleDigit(Peek(1))) { goto NotFloat; } if (Match('.') && MatchAndSkipDigits()) { bExpOptional = true; } else if (MatchAndSkipDigits()) { // Differentiate between 1. and 1.rr for example if (Peek() == '.' && IsSwizzleDigit(Peek(1))) { goto NotFloat; } if (Match('.')) { // Check for infinity constant if (Match('#')) { if (MatchString(MATCH_TARGET(TEXT("INF")))) { goto Done; } else { goto NotFloat; } } bExpOptional = true; MatchAndSkipDigits(); } else { if (IsValidFloatSuffix(Char)) { Type = ELiteralType::FloatSuffix; goto Done; } bExpOptional = false; } } else { goto NotFloat; } { // Exponent [eE][+-]?[0-9]+ bool bExponentFound = false; if (Match('e') || Match('E')) { Char = Peek(); if (Char == '+' || Char == '-') { ++Current; } if (MatchAndSkipDigits()) { bExponentFound = true; } } if (!bExponentFound && !bExpOptional) { goto NotFloat; } } // [fF] Char = Peek(); if (IsValidFloatSuffix(Char)) { ++Current; Type = ELiteralType::FloatSuffix; } Done: OutLiteral.Append(Original, static_cast(Current - Original)); OutType = Type; return true; NotFloat: Current = Original; return false; } bool MatchQuotedString(TStringBuilderBase& OutString) { if (!Match('"')) { return false; } OutString = TEXT(""); while (Peek() != '"') { auto Char = Peek(); OutString += Char; if (Char == 0) { return false; // ill-formed string (EOL inside quote) } if (Char == '\\') // escaped character, we must have at least one more { ++Current; Char = Peek(); if (Char == 0) { return false; // EOL } OutString += Char; //@todo-rco: Should we validate we have escaped a valid character? } ++Current; } if (Match('"')) { return true; } //@todo-rco: Error! check(0); return false; } bool MatchIdentifier(TStringBuilderBase& OutIdentifier) { if (HasCharsAvailable()) { auto Char = Peek(); if (!IsAlpha(Char) && Char != '_') { return false; } ++Current; OutIdentifier = TEXT(""); OutIdentifier += Char; do { Char = Peek(); if (!IsAlphaOrDigit(Char) && Char != '_') { break; } OutIdentifier += Char; ++Current; } while (HasCharsAvailable()); return true; } return false; } bool MatchSymbol(EHlslToken& OutToken, TStringBuilderBase& OutTokenString) { if (HasCharsAvailable()) { if (MatchSymbolToken(Current, &Current, OutToken, &OutTokenString, false)) { return true; } } return false; } static void ProcessDirective(FTokenizer& Tokenizer, FCompilerMessages& CompilerMessages, class FHlslScanner& Scanner); void ReadToEndOfLine(TStringBuilderBase& OutString, bool bSkipToNextLine = true) { const TCHAR* Start = Current; const TCHAR* EndOfLine = Current; while (HasCharsAvailable()) { auto Char = Peek(); if (Char == '\r' && Peek() == '\n') { break; } else if (Char == '\n') { break; } else { EndOfLine = Current; ++Current; } } if (bSkipToNextLine) { SkipToNextLine(); } int32 Count = (int32)(EndOfLine - Start) + 1; OutString.Append(Start, Count); } bool RuleDecimalInteger(TStringBuilderBase& OutValue) { // [1-9][0-9]* auto Char = Peek(); if (Char < '1' || Char > '9') { return false; } OutValue = TEXT(""); ++Current; OutValue += Char; while (HasCharsAvailable()) { Char = Peek(); if (!IsDigit(Char)) { break; } ++Current; OutValue += Char; } return true; } bool RuleDecimalIntegerValue(uint32& OutValue) { TStringBuilder<11> String; // large enough for uint32 max + terminator if (RuleDecimalInteger(String)) { OutValue = (uint32)FCString::Atoi(*String); return true; } return false; } bool RuleOctalInteger(TStringBuilderBase& OutValue) { // 0[0-7]* auto Char = Peek(); if (Char != '0') { return false; } OutValue = TEXT("0"); ++Current; while (HasCharsAvailable()) { Char = Peek(); if (Char >= '0' && Char <= '7') { OutValue += Char; } else { break; } ++Current; } return true; } bool RuleHexadecimalInteger(TStringBuilderBase& OutValue) { // 0[xX][0-9a-zA-Z]+ auto Char = Peek(); auto Char1 = Peek(1); auto Char2 = Peek(2); if (Char == '0' && (Char1 == 'x' || Char1 == 'X') && IsHexDigit(Char2)) { Current += 2; OutValue = TEXT("0"); OutValue += Char1; do { Char = Peek(); if (IsDigit(Char) || (Char >= 'a' && Char <= 'f') || (Char >= 'A' && Char <= 'F')) { OutValue += Char; } else { break; } ++Current; } while (HasCharsAvailable()); return true; } return false; } static bool IsValidIntegerSuffix(TCHAR Char) { switch (Char) { case (TCHAR)'u': case (TCHAR)'U': case (TCHAR)'l': case (TCHAR)'L': return true; default: break; } return false; } static bool IsValidFloatSuffix(TCHAR Char) { switch (Char) { case (TCHAR)'F': case (TCHAR)'f': return true; default: break; } return false; } bool MatchLiteral(TStringBuilderBase& OutValue, ELiteralType& OutLiteralType) { if (MatchLiteralFloat(OutValue, OutLiteralType)) { return true; } else { return MatchLiteralInteger(OutValue, OutLiteralType); } } bool MatchLiteralInteger(TStringBuilderBase& OutValue, ELiteralType& OutLiteralType) { if (RuleHexadecimalInteger(OutValue)) { auto Char = Peek(); if (IsValidIntegerSuffix(Char)) { ++Current; OutValue += Char; OutLiteralType = ELiteralType::HexSuffix; } else { OutLiteralType = ELiteralType::Hex; } return true; } else if (RuleOctalInteger(OutValue)) { if (OutValue.Len() == 1 && OutValue.LastChar() == (TCHAR)'0') { auto Char = Peek(); if (IsValidIntegerSuffix(Char)) { ++Current; OutValue += Char; OutLiteralType = ELiteralType::IntegerSuffix; } else { OutLiteralType = ELiteralType::Integer; } } else { OutLiteralType = ELiteralType::Octal; } return true; } else if (RuleDecimalInteger(OutValue)) { auto Char = Peek(); if (IsValidIntegerSuffix(Char)) { ++Current; OutValue += Char; OutLiteralType = ELiteralType::IntegerSuffix; } else { OutLiteralType = ELiteralType::Integer; } return true; } return false; } }; FHlslScanner::FHlslScanner(FCompilerMessages& InCompilerMessages) : CompilerMessages(InCompilerMessages), CurrentToken(0) { } FHlslScanner::~FHlslScanner() { } inline void FHlslScanner::EmplaceToken(FHlslToken&& Token, const FTokenizer& Tokenizer) { int32 TokenIndex = Tokens.Emplace(MoveTemp(Token)); Tokens[TokenIndex].SourceInfo.Filename = &SourceFilenames.Last(); Tokens[TokenIndex].SourceInfo.Line = Tokenizer.Line; Tokens[TokenIndex].SourceInfo.Column = (int32)(Tokenizer.Current - Tokenizer.CurrentLineStart) + 1; } void FHlslScanner::Clear(const FString& Filename) { Tokens.Empty(); SourceFilenames.Add(new FString(Filename)); } bool FHlslScanner::Lex(const FString& String, const FString& Filename) { Clear(Filename); // Simple heuristic to avoid reallocating Tokens.Reserve(String.Len() / 8); FTokenizer Tokenizer(String, Filename); while (Tokenizer.HasCharsAvailable()) { auto* Sanity = Tokenizer.Current; Tokenizer.SkipWhitespaceAndEmptyLines(); if (Tokenizer.Peek() == '#') { FTokenizer::ProcessDirective(Tokenizer, CompilerMessages, *this); if (Tokenizer.Filename != SourceFilenames.Last()) { SourceFilenames.Add(new FString(Tokenizer.Filename)); } } else { TStringBuilder<128> Identifier; EHlslToken SymbolToken; ELiteralType LiteralType = ELiteralType::Unknown; if (Tokenizer.MatchLiteral(Identifier, LiteralType)) { EmplaceToken(FHlslToken(Identifier, LiteralType), Tokenizer); } else if (Tokenizer.MatchIdentifier(Identifier)) { if (!FCString::Strcmp(*Identifier, TEXT("true")) || !FCString::Strcmp(*Identifier, TEXT("false"))) { EmplaceToken(FHlslToken(*Identifier, ELiteralType::Bool), Tokenizer); } else if (MatchSymbolToken(*Identifier, nullptr, SymbolToken, nullptr, true)) { EmplaceToken(FHlslToken(SymbolToken, Identifier), Tokenizer); } else { EmplaceToken(FHlslToken(Identifier), Tokenizer); } } else if (Tokenizer.MatchSymbol(SymbolToken, Identifier)) { EmplaceToken(FHlslToken(SymbolToken, Identifier), Tokenizer); } else if (Tokenizer.MatchQuotedString(Identifier)) { EmplaceToken(FHlslToken(EHlslToken::StringConstant, Identifier), Tokenizer); } else if (Tokenizer.HasCharsAvailable()) { //@todo-rco: Unknown token! if (Tokenizer.Filename.Len() > 0) { CompilerMessages.SourceError(*FString::Printf(TEXT("Unknown token at line %d, file '%s'!"), Tokenizer.Line, *Tokenizer.Filename)); } else { CompilerMessages.SourceError(*FString::Printf(TEXT("Unknown token at line %d!"), Tokenizer.Line)); } return false; } } check(Sanity != Tokenizer.Current); } return true; } void FHlslScanner::Dump() { for (int32 Index = 0; Index < Tokens.Num(); ++Index) { auto& Token = Tokens[Index]; switch (Token.Token) { case EHlslToken::Literal: FPlatformMisc::LowLevelOutputDebugStringf(TEXT("** %d: Literal Type %d '%s'\n"), Index, (int32)Token.LiteralType, *Token.String); break; default: FPlatformMisc::LowLevelOutputDebugStringf(TEXT("** %d: %d '%s'\n"), Index, Token.Token, *Token.String); break; } } } bool FHlslScanner::MatchToken(EHlslToken InToken) { const auto* Token = GetCurrentToken(); if (Token) { if (Token->Token == InToken) { ++CurrentToken; return true; } } return false; } bool FHlslScanner::MatchIntegerLiteral() { const auto* Token = GetCurrentToken(); if (Token) { if (Token->Token == EHlslToken::Literal) { if (IsIntegerType(Token->LiteralType)) { ++CurrentToken; return true; } } } return false; } const FHlslToken* FHlslScanner::PeekToken(uint32 LookAhead /*= 0*/) const { if (CurrentToken + LookAhead < (uint32)Tokens.Num()) { return &Tokens[CurrentToken + LookAhead]; } return nullptr; } bool FHlslScanner::HasMoreTokens() const { return CurrentToken < (uint32)Tokens.Num(); } const FHlslToken* FHlslScanner::GetCurrentToken() const { if (CurrentToken < (uint32)Tokens.Num()) { return &Tokens[CurrentToken]; } return nullptr; } const FHlslToken* FHlslScanner::GetCurrentTokenAndAdvance() { if (CurrentToken < (uint32)Tokens.Num()) { auto* Return = &Tokens[CurrentToken]; Advance(); } return nullptr; } void FHlslScanner::SetCurrentTokenIndex(uint32 NewToken) { check(NewToken <= (uint32)Tokens.Num()); CurrentToken = NewToken; } void FHlslScanner::SourceError(const FString& Error) { if (CurrentToken < (uint32)Tokens.Num()) { const auto& Token = Tokens[CurrentToken]; check(Token.SourceInfo.Filename); CompilerMessages.SourceError(Token.SourceInfo, *Error); } else { CompilerMessages.SourceError(*Error); } } void FTokenizer::ProcessDirective(FTokenizer& Tokenizer, FCompilerMessages& CompilerMessages, FHlslScanner& Scanner) { check(Tokenizer.Peek() == '#'); if (Tokenizer.MatchString(MATCH_TARGET(TEXT("#line")))) { Tokenizer.SkipWhitespaceInLine(); uint32 Line = 0; if (Tokenizer.RuleDecimalIntegerValue(Line)) { Tokenizer.Line = Line - 1; Tokenizer.SkipWhitespaceInLine(); TStringBuilder<256> Filename; if (Tokenizer.MatchQuotedString(Filename)) { Tokenizer.Filename = Filename; } } else { TStringBuilder<128> LineString; LineString += TEXTVIEW("#line "); Tokenizer.ReadToEndOfLine(LineString); CompilerMessages.SourceError(*FString::Printf(TEXT("Malformed #line directive: %s!"), *LineString)); } } else if (Tokenizer.MatchString(MATCH_TARGET(TEXT("#pragma")))) { TStringBuilder<128> Pragma; Pragma += TEXTVIEW("#pragma"); Tokenizer.ReadToEndOfLine(Pragma, false); Scanner.EmplaceToken(FHlslToken(EHlslToken::Pragma, Pragma), Tokenizer); } else if (Tokenizer.MatchString(MATCH_TARGET(TEXT("#if 0")))) { if (Tokenizer.Peek() == ' ' || Tokenizer.Peek() == '\n') { Tokenizer.SkipToNextLine(); while (Tokenizer.HasCharsAvailable() && Tokenizer.Peek() != '#') { Tokenizer.SkipToNextLine(); } if (Tokenizer.MatchString(MATCH_TARGET(TEXT("#endif")))) { // Nothing here, skip to next line will happen later } else { CompilerMessages.SourceWarning(*FString::Printf(TEXT("Expected #endif preprocessor directive; HlslParser requires preprocessed input!"))); } } else { TStringBuilder<128> Directive; Directive += TEXT("#if 0"); Tokenizer.ReadToEndOfLine(Directive); CompilerMessages.SourceWarning(*FString::Printf(TEXT("Unhandled preprocessor directive (%.500s); HlslParser requires preprocessed input!"), *Directive)); } } else { TStringBuilder<128> Directive; Tokenizer.ReadToEndOfLine(Directive); CompilerMessages.SourceWarning(*FString::Printf(TEXT("Unhandled token (%.500s); HlslParser requires preprocessed input!"), *Directive)); } Tokenizer.SkipToNextLine(); } }