948 lines
32 KiB
C++
948 lines
32 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "MetalCompileShaderMSC.h"
|
|
#include "MetalShaderCompiler.h"
|
|
|
|
#include "MetalShaderResources.h"
|
|
#include "Misc/FileHelper.h"
|
|
#include "Misc/Paths.h"
|
|
#include "Misc/Compression.h"
|
|
#include "Misc/OutputDeviceRedirector.h"
|
|
#include "MetalBackend.h"
|
|
#include "RHIDefinitions.h"
|
|
#include "Serialization/MemoryReader.h"
|
|
#include "Serialization/MemoryWriter.h"
|
|
#include "ShaderCompilerDefinitions.h"
|
|
#include "SpirvReflectCommon.h"
|
|
#include "ShaderParameterParser.h"
|
|
#include "Containers/AnsiString.h"
|
|
|
|
#include <regex>
|
|
|
|
#if UE_METAL_USE_METAL_SHADER_CONVERTER
|
|
|
|
#if PLATFORM_MAC || PLATFORM_WINDOWS
|
|
THIRD_PARTY_INCLUDES_START
|
|
#include "metal_irconverter.h"
|
|
THIRD_PARTY_INCLUDES_END
|
|
|
|
extern void BuildMetalShaderOutput(
|
|
FShaderCompilerOutput& ShaderOutput,
|
|
const FShaderCompilerInput& ShaderInput,
|
|
const ANSICHAR* InShaderSource,
|
|
uint32 SourceLen,
|
|
uint32 SourceCRCLen,
|
|
uint32 SourceCRC,
|
|
uint32 Version,
|
|
TCHAR const* Standard,
|
|
TCHAR const* MinOSVersion,
|
|
TArray<FShaderCompilerError>& OutErrors,
|
|
uint32 TypedBuffers,
|
|
uint32 InvariantBuffers,
|
|
uint32 TypedUAVs,
|
|
uint32 ConstantBuffers,
|
|
bool bAllowFastIntriniscs,
|
|
uint32 NumCBVs,
|
|
uint32 OutputSizeVS,
|
|
uint32 MaxInputPrimitivesPerMeshThreadgroupGS,
|
|
const bool bUsesDiscard,
|
|
char const* ShaderReflectionJSON,
|
|
FMetalShaderBytecode const& CompiledShaderBytecode
|
|
);
|
|
|
|
#include "ShaderConductorContext.h"
|
|
|
|
#if PLATFORM_WINDOWS
|
|
#include "Windows/AllowWindowsPlatformTypes.h"
|
|
#endif
|
|
#include "d3d12shader.h"
|
|
#include "dxc/dxcapi.h"
|
|
#if PLATFORM_WINDOWS
|
|
#include <dxc/Support/dxcapi.use.h>
|
|
#include "Windows/HideWindowsPlatformTypes.h"
|
|
#endif
|
|
|
|
inline IRShaderStage ShaderFrequencyToStage(const EShaderFrequency UEStage)
|
|
{
|
|
switch (UEStage)
|
|
{
|
|
case SF_Vertex : return IRShaderStageVertex;
|
|
case SF_Mesh : return IRShaderStageMesh;
|
|
case SF_Amplification : return IRShaderStageAmplification;
|
|
case SF_Pixel : return IRShaderStageFragment;
|
|
case SF_Geometry : return IRShaderStageGeometry;
|
|
case SF_Compute : return IRShaderStageCompute;
|
|
case SF_RayGen : return IRShaderStageRayGeneration;
|
|
case SF_RayMiss : return IRShaderStageMiss;
|
|
case SF_RayHitGroup : return IRShaderStageAnyHit; // TODO: How to distinguish AnyHit/ClosestHit/etc.?
|
|
case SF_RayCallable : return IRShaderStageCallable;
|
|
default : checkNoEntry();
|
|
}
|
|
return IRShaderStageInvalid;
|
|
}
|
|
|
|
inline IRShaderVisibility ShaderFrequencyToVisibility(const EShaderFrequency UEStage)
|
|
{
|
|
switch (UEStage)
|
|
{
|
|
case SF_Vertex : return IRShaderVisibilityVertex;
|
|
case SF_Mesh : return IRShaderVisibilityMesh;
|
|
case SF_Amplification : return IRShaderVisibilityAmplification;
|
|
case SF_Pixel : return IRShaderVisibilityPixel;
|
|
case SF_Geometry : return IRShaderVisibilityGeometry;
|
|
case SF_Compute : return IRShaderVisibilityAll;
|
|
case SF_RayGen : return IRShaderVisibilityAll;
|
|
case SF_RayMiss : return IRShaderVisibilityAll;
|
|
case SF_RayHitGroup : return IRShaderVisibilityAll;
|
|
case SF_RayCallable : return IRShaderVisibilityAll;
|
|
default : checkNoEntry();
|
|
}
|
|
return IRShaderVisibilityAll;
|
|
}
|
|
|
|
inline IRResourceType QuantizeD3DResourceType(const D3D_SHADER_INPUT_TYPE Type)
|
|
{
|
|
switch (Type)
|
|
{
|
|
case D3D_SIT_CBUFFER: return IRResourceTypeCBV;
|
|
case D3D_SIT_TBUFFER: return IRResourceTypeCBV;
|
|
case D3D_SIT_TEXTURE: return IRResourceTypeSRV;
|
|
case D3D_SIT_SAMPLER: return IRResourceTypeSampler;
|
|
case D3D_SIT_UAV_RWTYPED: return IRResourceTypeUAV;
|
|
case D3D_SIT_STRUCTURED: return IRResourceTypeSRV;
|
|
case D3D_SIT_UAV_RWSTRUCTURED: return IRResourceTypeUAV;
|
|
case D3D_SIT_BYTEADDRESS: return IRResourceTypeSRV;
|
|
case D3D_SIT_UAV_RWBYTEADDRESS: return IRResourceTypeUAV;
|
|
case D3D_SIT_UAV_APPEND_STRUCTURED: return IRResourceTypeUAV;
|
|
case D3D_SIT_UAV_CONSUME_STRUCTURED: return IRResourceTypeUAV;
|
|
case D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER: return IRResourceTypeUAV;
|
|
case D3D_SIT_RTACCELERATIONSTRUCTURE: return IRResourceTypeSRV;
|
|
case D3D_SIT_UAV_FEEDBACKTEXTURE: return IRResourceTypeUAV;
|
|
default: checkNoEntry();
|
|
}
|
|
|
|
return IRResourceTypeInvalid;
|
|
}
|
|
|
|
inline bool IsD3DResourceTypeTyped(const D3D_SHADER_INPUT_TYPE Type)
|
|
{
|
|
return Type == D3D_SIT_TBUFFER || Type == D3D_SIT_UAV_RWTYPED;
|
|
}
|
|
|
|
template<IRDescriptorRangeType DescriptorType>
|
|
static IRDescriptorRange1 CreateDescriptorRange(const uint32 NumDescriptors)
|
|
{
|
|
IRDescriptorRange1 DescRange;
|
|
DescRange.RangeType = DescriptorType;
|
|
DescRange.NumDescriptors = NumDescriptors;
|
|
DescRange.BaseShaderRegister = 0;
|
|
DescRange.RegisterSpace = 0;
|
|
DescRange.OffsetInDescriptorsFromTableStart = IRDescriptorRangeOffsetAppend;
|
|
|
|
switch (DescriptorType)
|
|
{
|
|
case IRDescriptorRangeTypeCBV:
|
|
case IRDescriptorRangeTypeSRV:
|
|
DescRange.Flags = IRDescriptorRangeFlagDataStaticWhileSetAtExecute;
|
|
break;
|
|
case IRDescriptorRangeTypeUAV:
|
|
DescRange.Flags = IRDescriptorRangeFlagDataVolatile;
|
|
break;
|
|
case IRDescriptorRangeTypeSampler:
|
|
DescRange.Flags = IRDescriptorRangeFlagNone;
|
|
break;
|
|
default:
|
|
checkNoEntry();
|
|
break;
|
|
}
|
|
|
|
return DescRange;
|
|
}
|
|
|
|
static void ProcessReflection(ID3D12ShaderReflection* ShaderReflection, const uint32 BoundResources, const FShaderCompilerInput& Input, FShaderCompilerOutput& Output, CrossCompiler::FHlslccHeaderWriter& CCHeaderWriter, FMetalShaderOutputMetaData& OutputData, uint32& NumCBVs)
|
|
{
|
|
NumCBVs = 0;
|
|
|
|
uint32 NumSRVs = 0;
|
|
uint32 NumUAVs = 0;
|
|
uint32 NumSamplers = 0;
|
|
|
|
bool bFoundGlobalOrRoot = false;
|
|
bool bFoundGlobal = false;
|
|
bool bFoundRoot = false;
|
|
|
|
// Build output metadata and collect infos for each resource type ranges.
|
|
for (uint32 ResourceIndex = 0; ResourceIndex < BoundResources; ResourceIndex++)
|
|
{
|
|
D3D12_SHADER_INPUT_BIND_DESC BindDesc;
|
|
ShaderReflection->GetResourceBindingDesc(ResourceIndex, &BindDesc);
|
|
|
|
IRResourceType ResourceType = QuantizeD3DResourceType(BindDesc.Type);
|
|
bool bIsResourceTyped = IsD3DResourceTypeTyped(BindDesc.Type);
|
|
const uint32 BindIndex = BindDesc.BindPoint;
|
|
|
|
const bool bRootConstantsCB = (FCStringAnsi::Strcmp(BindDesc.Name, "UERootConstants") == 0);
|
|
const bool bIsRootCB = FCString::Strcmp(ANSI_TO_TCHAR(BindDesc.Name), FShaderParametersMetadata::kRootUniformBufferBindingName) == 0;
|
|
|
|
switch (ResourceType)
|
|
{
|
|
case IRResourceTypeSRV:
|
|
if (bIsResourceTyped)
|
|
OutputData.TypedBuffers |= (1 << BindIndex);
|
|
else
|
|
OutputData.InvariantBuffers |= (1 << BindIndex);
|
|
|
|
CCHeaderWriter.WriteSRV(ANSI_TO_TCHAR(BindDesc.Name), BindIndex, BindDesc.BindCount);
|
|
NumSRVs = FMath::Max(NumSRVs, BindIndex + BindDesc.BindCount);
|
|
break;
|
|
|
|
case IRResourceTypeUAV:
|
|
if (bIsResourceTyped)
|
|
{
|
|
OutputData.TypedUAVs |= (1 << BindIndex);
|
|
OutputData.TypedBuffers |= (1 << BindIndex);
|
|
}
|
|
else
|
|
{
|
|
OutputData.InvariantBuffers |= (1 << BindIndex);
|
|
}
|
|
|
|
CCHeaderWriter.WriteUAV(ANSI_TO_TCHAR(BindDesc.Name), BindIndex, BindDesc.BindCount);
|
|
NumUAVs = FMath::Max(NumUAVs, BindIndex + BindDesc.BindCount);
|
|
break;
|
|
|
|
case IRResourceTypeSampler:
|
|
CCHeaderWriter.WriteSamplerState(ANSI_TO_TCHAR(BindDesc.Name), BindIndex);
|
|
NumSamplers = FMath::Max(NumSamplers, BindIndex + BindDesc.BindCount);
|
|
break;
|
|
|
|
case IRResourceTypeCBV:
|
|
{
|
|
bool bIsGlobalCB = (FCStringAnsi::Strcmp(BindDesc.Name, "$Globals") == 0);
|
|
|
|
int32 ConstantBufferSize = 0;
|
|
|
|
OutputData.ConstantBuffers |= (1 << BindIndex);
|
|
|
|
// Global uniform buffer - handled specially as we care about the internal layout
|
|
if (bIsGlobalCB || bIsRootCB)
|
|
{
|
|
TCBDMARangeMap CBRanges;
|
|
CCHeaderWriter.WritePackedUB(BindIndex);
|
|
|
|
ID3D12ShaderReflectionConstantBuffer* ConstantBuffer = ShaderReflection->GetConstantBufferByName(BindDesc.Name);
|
|
|
|
D3D12_SHADER_BUFFER_DESC CBDesc;
|
|
ConstantBuffer->GetDesc(&CBDesc);
|
|
|
|
const uint32 CBIndex = BindIndex;
|
|
|
|
FString MbrString;
|
|
|
|
// Track all of the variables in this constant buffer.
|
|
for (uint32 ConstantIndex = 0; ConstantIndex < CBDesc.Variables; ConstantIndex++)
|
|
{
|
|
ID3D12ShaderReflectionVariable* Variable = ConstantBuffer->GetVariableByIndex(ConstantIndex);
|
|
|
|
D3D12_SHADER_VARIABLE_DESC VariableDesc;
|
|
Variable->GetDesc(&VariableDesc);
|
|
|
|
if (VariableDesc.uFlags & D3D_SVF_USED)
|
|
{
|
|
CCHeaderWriter.WritePackedUBField(ANSI_TO_TCHAR(VariableDesc.Name), VariableDesc.StartOffset, VariableDesc.Size);
|
|
|
|
const uint32 MbrOffset = VariableDesc.StartOffset / sizeof(float);
|
|
const uint32 MbrSize = VariableDesc.Size / sizeof(float);
|
|
unsigned DestCBPrecision = TEXT('h');
|
|
unsigned SourceOffset = MbrOffset;
|
|
unsigned DestOffset = MbrOffset;
|
|
unsigned DestSize = MbrSize;
|
|
unsigned DestCBIndex = 0;
|
|
InsertRange(CBRanges, BindIndex, SourceOffset, DestSize, DestCBIndex, DestCBPrecision, DestOffset);
|
|
|
|
{
|
|
HandleReflectedGlobalConstantBufferMember(
|
|
FString(VariableDesc.Name),
|
|
BindIndex,
|
|
VariableDesc.StartOffset,
|
|
VariableDesc.Size,
|
|
Output);
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ID3D12ShaderReflectionConstantBuffer* ConstantBuffer = ShaderReflection->GetConstantBufferByName(BindDesc.Name);
|
|
|
|
D3D12_SHADER_BUFFER_DESC CBDesc;
|
|
ConstantBuffer->GetDesc(&CBDesc);
|
|
|
|
const FString UniformBufferName(BindDesc.Name);
|
|
const EUniformBufferMemberReflectionReason Reason = ShouldReflectUniformBufferMembers(Input, UniformBufferName);
|
|
if (Reason != EUniformBufferMemberReflectionReason::None)
|
|
{
|
|
for (uint32 ConstantIndex = 0; ConstantIndex < CBDesc.Variables; ConstantIndex++)
|
|
{
|
|
ID3D12ShaderReflectionVariable* Variable = ConstantBuffer->GetVariableByIndex(ConstantIndex);
|
|
|
|
D3D12_SHADER_VARIABLE_DESC VariableDesc;
|
|
Variable->GetDesc(&VariableDesc);
|
|
|
|
if (VariableDesc.uFlags & D3D_SVF_USED)
|
|
{
|
|
HandleReflectedUniformBufferConstantBufferMember(
|
|
Reason,
|
|
UniformBufferName,
|
|
BindIndex,
|
|
FString(VariableDesc.Name),
|
|
VariableDesc.StartOffset,
|
|
VariableDesc.Size,
|
|
Output
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Regular uniform buffer - we only care about the binding index
|
|
CCHeaderWriter.WriteUniformBlock(*UniformBufferName, BindIndex);
|
|
HandleReflectedUniformBuffer(UniformBufferName, BindIndex, Output);
|
|
}
|
|
NumCBVs = FMath::Max(NumCBVs, BindIndex + BindDesc.BindCount);
|
|
}
|
|
break;
|
|
default:
|
|
checkNoEntry();
|
|
};
|
|
}
|
|
|
|
// DXIL fetches resources from the resources heaps.
|
|
check(NumSRVs == 0 && NumUAVs == 0 && NumSamplers == 0);
|
|
}
|
|
|
|
#if PLATFORM_WINDOWS
|
|
static dxc::DxcDllSupport& GetDxcDllHelper()
|
|
{
|
|
struct DxcDllHelper
|
|
{
|
|
DxcDllHelper()
|
|
{
|
|
const HRESULT Result = DxcDllSupport.Initialize();
|
|
if (FAILED(Result))
|
|
{
|
|
//TODO: Do something
|
|
}
|
|
}
|
|
dxc::DxcDllSupport DxcDllSupport;
|
|
};
|
|
|
|
static DxcDllHelper DllHelper;
|
|
return DllHelper.DxcDllSupport;
|
|
}
|
|
|
|
class FDxcMalloc final : public IMalloc
|
|
{
|
|
std::atomic<ULONG> RefCount{ 1 };
|
|
|
|
public:
|
|
|
|
// IMalloc
|
|
|
|
void* STDCALL Alloc(SIZE_T cb) override
|
|
{
|
|
cb = FMath::Max(SIZE_T(1), cb);
|
|
return FMemory::Malloc(cb);
|
|
}
|
|
|
|
void* STDCALL Realloc(void* pv, SIZE_T cb) override
|
|
{
|
|
cb = FMath::Max(SIZE_T(1), cb);
|
|
return FMemory::Realloc(pv, cb);
|
|
}
|
|
|
|
void STDCALL Free(void* pv) override
|
|
{
|
|
return FMemory::Free(pv);
|
|
}
|
|
|
|
SIZE_T STDCALL GetSize(void* pv) override
|
|
{
|
|
return FMemory::GetAllocSize(pv);
|
|
}
|
|
|
|
int STDCALL DidAlloc(void* pv) override
|
|
{
|
|
return 1; // assume that all allocation queries coming from DXC belong to our allocator
|
|
}
|
|
|
|
void STDCALL HeapMinimize() override
|
|
{
|
|
// nothing
|
|
}
|
|
|
|
// IUnknown
|
|
|
|
ULONG STDCALL AddRef() override
|
|
{
|
|
return ++RefCount;
|
|
}
|
|
|
|
ULONG STDCALL Release() override
|
|
{
|
|
check(RefCount > 0);
|
|
return --RefCount;
|
|
}
|
|
|
|
HRESULT STDCALL QueryInterface(REFIID iid, void** ppvObject) override
|
|
{
|
|
checkNoEntry(); // We do not expect or support QI on DXC allocator replacement
|
|
return ERROR_NOINTERFACE;
|
|
}
|
|
};
|
|
|
|
static IMalloc* GetDxcMalloc()
|
|
{
|
|
static FDxcMalloc Instance;
|
|
return &Instance;
|
|
}
|
|
#endif // PLATFORM_WINDOWS
|
|
|
|
static bool ReflectDXILAndBuildDescriptorRanges(const TArray<uint32>& DXILReflection, const FShaderCompilerInput& Input, FShaderCompilerOutput& Output, CrossCompiler::FHlslccHeaderWriter& CCHeaderWriter, FMetalShaderOutputMetaData& OutputData, uint32& NumCBVs, uint32& NumInstructions)
|
|
{
|
|
// Reflect DXIL
|
|
TRefCountPtr<IDxcUtils> Utils;
|
|
#if PLATFORM_MAC
|
|
HRESULT Result = DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(Utils.GetInitReference()));
|
|
#elif PLATFORM_WINDOWS
|
|
dxc::DxcDllSupport& DxcDllHelper = GetDxcDllHelper();
|
|
HRESULT Result = DxcDllHelper.CreateInstance2(GetDxcMalloc(), CLSID_DxcUtils, Utils.GetInitReference());
|
|
#endif
|
|
if (!SUCCEEDED(Result))
|
|
{
|
|
UE_LOG(LogShaders, Warning, TEXT("Failed to create DxcUtils"));
|
|
return false;
|
|
}
|
|
|
|
DxcBuffer ReflBuffer = {0};
|
|
ReflBuffer.Ptr = DXILReflection.GetData();
|
|
ReflBuffer.Size = DXILReflection.Num() * sizeof(uint32_t);
|
|
|
|
// Stolen from D3DShaderCompilerDXC (do we really need this for Metal?)
|
|
uint32 ShaderRequiresFlags = 0;
|
|
|
|
if (!Input.IsRayTracingShader())
|
|
{
|
|
TRefCountPtr<ID3D12ShaderReflection> ShaderReflection;
|
|
Result = Utils->CreateReflection(&ReflBuffer, IID_PPV_ARGS(ShaderReflection.GetInitReference()));
|
|
if (!SUCCEEDED(Result))
|
|
{
|
|
UE_LOG(LogShaders, Warning, TEXT("Failed to create shader reflection (CreateReflection returned 0x%x)"), Result);
|
|
return false;
|
|
}
|
|
|
|
D3D12_SHADER_DESC ShaderDesc = {};
|
|
ShaderReflection->GetDesc(&ShaderDesc);
|
|
|
|
NumInstructions = ShaderDesc.InstructionCount;
|
|
|
|
// Return a fraction of the number of instructions as DXIL is more verbose than DXBC.
|
|
// Ratio 119:307 was estimated by gathering average instruction count for D3D11 and D3D12 shaders in ShooterGame with result being ~ 357:921.
|
|
constexpr uint32 DxbcToDxilInstructionRatio[2] = { 119, 307 };
|
|
NumInstructions = NumInstructions * DxbcToDxilInstructionRatio[0] / DxbcToDxilInstructionRatio[1];
|
|
|
|
ProcessReflection(ShaderReflection.GetReference(), ShaderDesc.BoundResources, Input, Output, CCHeaderWriter, OutputData, NumCBVs);
|
|
|
|
// Vertex Input
|
|
for (uint32 InputIndex = 0; InputIndex < ShaderDesc.InputParameters; InputIndex++)
|
|
{
|
|
D3D12_SIGNATURE_PARAMETER_DESC SignatureParamDesc;
|
|
ShaderReflection->GetInputParameterDesc(InputIndex, &SignatureParamDesc);
|
|
|
|
FString TypeQualifier;
|
|
switch (SignatureParamDesc.ComponentType)
|
|
{
|
|
case D3D_REGISTER_COMPONENT_UINT32:
|
|
TypeQualifier = TEXT("u");
|
|
break;
|
|
case D3D_REGISTER_COMPONENT_SINT32:
|
|
TypeQualifier = TEXT("i");
|
|
break;
|
|
case D3D_REGISTER_COMPONENT_FLOAT32:
|
|
TypeQualifier = TEXT("f");
|
|
break;
|
|
case D3D_REGISTER_COMPONENT_UNKNOWN:
|
|
default:
|
|
checkNoEntry();
|
|
break;
|
|
}
|
|
|
|
CCHeaderWriter.WriteInputAttribute(TEXT("in_ATTRIBUTE"), *TypeQualifier, SignatureParamDesc.SemanticIndex, /*bLocationPrefix:*/ false, /*bLocationSuffix:*/ true);
|
|
}
|
|
|
|
// Pixel Output
|
|
for (uint32 OutputIndex = 0; OutputIndex < ShaderDesc.OutputParameters; OutputIndex++)
|
|
{
|
|
D3D12_SIGNATURE_PARAMETER_DESC SignatureParamDesc;
|
|
ShaderReflection->GetOutputParameterDesc(OutputIndex, &SignatureParamDesc);
|
|
|
|
FString TypeQualifier;
|
|
switch (SignatureParamDesc.ComponentType)
|
|
{
|
|
case D3D_REGISTER_COMPONENT_UINT32:
|
|
TypeQualifier = TEXT("u");
|
|
break;
|
|
case D3D_REGISTER_COMPONENT_SINT32:
|
|
TypeQualifier = TEXT("i");
|
|
break;
|
|
case D3D_REGISTER_COMPONENT_FLOAT32:
|
|
TypeQualifier = TEXT("f");
|
|
break;
|
|
case D3D_REGISTER_COMPONENT_UNKNOWN:
|
|
default:
|
|
checkNoEntry();
|
|
break;
|
|
}
|
|
|
|
FString SemanticName = SignatureParamDesc.SemanticName;
|
|
CCHeaderWriter.WriteOutputAttribute(*SemanticName, *TypeQualifier, SignatureParamDesc.SemanticIndex, /*bLocationPrefix:*/ false, /*bLocationSuffix:*/ true);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
check (false);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
struct FMetalShaderParameterParserPlatformConfiguration : public FShaderParameterParser::FPlatformConfiguration
|
|
{
|
|
FMetalShaderParameterParserPlatformConfiguration()
|
|
: FShaderParameterParser::FPlatformConfiguration(TEXTVIEW("cbuffer"), EShaderParameterParserConfigurationFlags::UseStableConstantBuffer|EShaderParameterParserConfigurationFlags::SupportsBindless)
|
|
{
|
|
}
|
|
|
|
virtual FString GenerateBindlessAccess(EBindlessConversionType BindlessType, FStringView FullTypeString, FStringView ArrayNameOverride, FStringView IndexString) const final
|
|
{
|
|
// GetSRVFromHeap(Type, Index) ResourceDescriptorHeap[Index]
|
|
// GetUAVFromHeap(Type, Index) ResourceDescriptorHeap[Index]
|
|
// GetSamplerFromHeap(Type, Index) SamplerDescriptorHeap[Index]
|
|
|
|
const TCHAR* HeapString = BindlessType == EBindlessConversionType::Sampler ? TEXT("SamplerDescriptorHeap") : TEXT("ResourceDescriptorHeap");
|
|
|
|
return FString::Printf(TEXT("%s[%.*s]"),
|
|
HeapString,
|
|
IndexString.Len(), IndexString.GetData()
|
|
);
|
|
}
|
|
};
|
|
|
|
void FMetalCompileShaderMSC::DoCompileMetalShader(
|
|
const FShaderCompilerInput& Input,
|
|
FShaderCompilerOutput& Output,
|
|
const FString& InPreprocessedShader,
|
|
uint32 VersionEnum,
|
|
EMetalGPUSemantics Semantics,
|
|
uint32 MaxUnrollLoops,
|
|
EShaderFrequency Frequency,
|
|
bool bDumpDebugInfo,
|
|
const FString& Standard,
|
|
const FString& MinOSVersion)
|
|
{
|
|
int32 IABTier = VersionEnum >= 4 ? Input.Environment.GetCompileArgument(TEXT("METAL_INDIRECT_ARGUMENT_BUFFERS"), 0) : 0;
|
|
|
|
Output.bSucceeded = false;
|
|
|
|
std::string MetalSource;
|
|
FString MetalErrors;
|
|
|
|
bool const bZeroInitialise = Input.Environment.CompilerFlags.Contains(CFLAG_ZeroInitialise);
|
|
bool const bBoundsChecks = Input.Environment.CompilerFlags.Contains(CFLAG_BoundsChecking);
|
|
|
|
bool bAllowFastIntrinsics = true;
|
|
|
|
// WPO requires that we make all multiply/sincos instructions invariant :(
|
|
bool bForceInvariance = Input.Environment.GetCompileArgument(TEXT("USES_WORLD_POSITION_OFFSET"), false);
|
|
|
|
FMetalShaderOutputMetaData OutputData;
|
|
|
|
uint32 CRCLen = 0;
|
|
uint32 CRC = 0;
|
|
uint32 SourceLen = 0;
|
|
|
|
struct FMetalResourceTableEntry : FUniformResourceEntry
|
|
{
|
|
FString Name;
|
|
uint32 Size;
|
|
uint32 SetIndex;
|
|
bool bUsed;
|
|
};
|
|
TMap<FString, TArray<FMetalResourceTableEntry>> IABs;
|
|
|
|
FString PreprocessedShader = InPreprocessedShader;
|
|
|
|
uint32 NumCBVs = 0;
|
|
const char* ReflectionJSON = nullptr;
|
|
bool bUsesDiscard = false;
|
|
uint32 OutputSizeVS = 0;
|
|
uint32 MaxInputPrimitivesPerMeshThreadgroupGS = 0;
|
|
|
|
FMetalShaderBytecode MetalBytecode;
|
|
|
|
#if PLATFORM_MAC || PLATFORM_WINDOWS
|
|
{
|
|
std::string EntryPointNameAnsi(TCHAR_TO_UTF8(*Input.EntryPointName));
|
|
|
|
CrossCompiler::FShaderConductorContext CompilerContext;
|
|
|
|
// Initialize compilation options for ShaderConductor
|
|
CrossCompiler::FShaderConductorOptions Options;
|
|
|
|
Options.TargetEnvironment = CrossCompiler::FShaderConductorOptions::ETargetEnvironment::Vulkan_1_2;
|
|
|
|
// Enable HLSL 2021 if specified
|
|
if (Input.Environment.CompilerFlags.Contains(CFLAG_HLSL2021))
|
|
{
|
|
Options.HlslVersion = 2021;
|
|
}
|
|
|
|
Options.bEnable16bitTypes = true;
|
|
|
|
FMetalShaderParameterParserPlatformConfiguration PlatformConfiguration;
|
|
FShaderParameterParser ShaderParameterParser(PlatformConfiguration);
|
|
if (!ShaderParameterParser.ParseAndModify(Input, Output.Errors, PreprocessedShader))
|
|
{
|
|
// The FShaderParameterParser will add any relevant errors.
|
|
return;
|
|
}
|
|
|
|
TArray<FString> ExtraArgs;
|
|
|
|
if (Input.Environment.CompilerFlags.Contains(CFLAG_GenerateSymbols))
|
|
{
|
|
ExtraArgs.Add(TEXT("-Zi"));
|
|
ExtraArgs.Add(TEXT("-Qembed_debug"));
|
|
ExtraArgs.Add(TEXT("--ignore-line-directives"));
|
|
}
|
|
|
|
if (Input.Environment.CompilerFlags.Contains(CFLAG_Debug) || Input.Environment.CompilerFlags.Contains(CFLAG_SkipOptimizationsDXC))
|
|
{
|
|
// Currently cannot enable -Od because we have unbound parameters
|
|
ExtraArgs.Add(TEXT("-O1"));
|
|
}
|
|
else if (Input.Environment.CompilerFlags.Contains(CFLAG_StandardOptimization))
|
|
{
|
|
ExtraArgs.Add(TEXT("-O1"));
|
|
}
|
|
else
|
|
{
|
|
ExtraArgs.Add(TEXT("-O3"));
|
|
}
|
|
|
|
// Load shader source into compiler context
|
|
CompilerContext.LoadSource(PreprocessedShader, Input.VirtualSourceFilePath, Input.EntryPointName, Frequency, nullptr, &ExtraArgs);
|
|
|
|
// Convert shader source to ANSI string
|
|
FAnsiString SourceData = FAnsiString::ConstructFromPtrSize(CompilerContext.GetSourceString(), CompilerContext.GetSourceLength());
|
|
|
|
// Replace special case texture "gl_LastFragData" by native subpass fetch operation
|
|
static const uint32 MaxMetalSubpasses = 8;
|
|
uint32 SubpassInputsDim[MaxMetalSubpasses];
|
|
|
|
bool bSourceDataWasModified = PatchSpecialTextureInHlslSource(SourceData, SubpassInputsDim, MaxMetalSubpasses);
|
|
|
|
// If source data was modified, reload it into the compiler context
|
|
if (bSourceDataWasModified)
|
|
{
|
|
CompilerContext.LoadSource(SourceData, Input.VirtualSourceFilePath, Input.EntryPointName, Frequency, nullptr, &ExtraArgs);
|
|
}
|
|
|
|
if (bDumpDebugInfo)
|
|
{
|
|
DumpDebugShaderText(Input, &SourceData[0], SourceData.Len(), TEXT("rewritten.hlsl"));
|
|
}
|
|
|
|
CrossCompiler::FHlslccHeaderWriter CCHeaderWriter;
|
|
|
|
FString ALNString;
|
|
FString RTString;
|
|
uint32 IABOffsetIndex = 0;
|
|
uint64 BufferIndices = 0xffffffffffffffff;
|
|
// Make sure int64 atomics and dynamic heap indexing are available.
|
|
Options.ShaderModel = {6, 6};
|
|
|
|
// Compile HLSL source to DXIL binary
|
|
TArray<uint32> DxilData;
|
|
|
|
if (!CompilerContext.CompileHlslToDxil(Options, DxilData))
|
|
{
|
|
UE_LOG(LogShaders, Error, TEXT("Failed to produce DXIL bytecode for '%s' '%s'!"), *Input.EntryPointName, *Input.DumpDebugInfoPath);
|
|
CompilerContext.FlushErrors(Output.Errors);
|
|
|
|
for (const FShaderCompilerError& Error : Output.Errors)
|
|
{
|
|
UE_LOG(LogShaders, Error, TEXT("%s"), *Error.GetErrorStringWithLineMarker());
|
|
}
|
|
|
|
Output.bSucceeded = false;
|
|
|
|
return;
|
|
}
|
|
|
|
// Return code reflection if requested for shader analysis
|
|
if (Input.Environment.CompilerFlags.Contains(CFLAG_OutputAnalysisArtifacts))
|
|
{
|
|
FGenericShaderStat ShaderCodeReflection;
|
|
if (CrossCompiler::FShaderConductorContext::Disassemble(CrossCompiler::EShaderConductorIR::Dxil, DxilData.GetData(), DxilData.Num()*sizeof(uint32), ShaderCodeReflection))
|
|
{
|
|
Output.ShaderStatistics.Add(MoveTemp(ShaderCodeReflection));
|
|
}
|
|
}
|
|
|
|
if (bDumpDebugInfo)
|
|
{
|
|
DumpDebugShaderBinary(Input, DxilData.GetData(), DxilData.Num() * sizeof(uint32), TEXT("dxil"));
|
|
}
|
|
|
|
ANSICHAR MainCRC[25];
|
|
CRCLen = DxilData.Num() * sizeof( uint32_t );
|
|
CRC = FCrc::MemCrc_DEPRECATED(DxilData.GetData(), CRCLen);
|
|
FCStringAnsi::Snprintf(MainCRC, 25, "Main_%0.8x_%0.8x", CRCLen, CRC);
|
|
|
|
// Build shader metadata and root signature parameters
|
|
bool bSuccessfulReflection = ReflectDXILAndBuildDescriptorRanges(DxilData, Input, Output, CCHeaderWriter, OutputData, NumCBVs, Output.NumInstructions);
|
|
check(bSuccessfulReflection);
|
|
|
|
// Build root parameters
|
|
const IRShaderVisibility ShaderVisibility = ShaderFrequencyToVisibility(Frequency);
|
|
|
|
// Bind CBVs as root parameters (this way, we avoid creating a descriptor table and an extra indirection at runtime).
|
|
TArray<IRRootParameter1> RootParams;
|
|
for (uint32 i = 0; i < NumCBVs; i++)
|
|
{
|
|
IRRootParameter1 RootParam;
|
|
RootParam.ParameterType = IRRootParameterTypeCBV;
|
|
RootParam.ShaderVisibility = ShaderVisibility;
|
|
RootParam.Descriptor.ShaderRegister = i;
|
|
RootParam.Descriptor.RegisterSpace = 0;
|
|
RootParam.Descriptor.Flags = IRRootDescriptorFlagDataStaticWhileSetAtExecute;
|
|
|
|
RootParams.Add(RootParam);
|
|
}
|
|
|
|
// Create the root signature for air generation.
|
|
IRVersionedRootSignatureDescriptor RootSignatureDesc;
|
|
RootSignatureDesc.version = IRRootSignatureVersion_1_1;
|
|
RootSignatureDesc.desc_1_1.Flags = IRRootSignatureFlagNone;
|
|
RootSignatureDesc.desc_1_1.pStaticSamplers = nullptr;
|
|
RootSignatureDesc.desc_1_1.NumStaticSamplers = 0;
|
|
RootSignatureDesc.desc_1_1.pParameters = RootParams.GetData();
|
|
RootSignatureDesc.desc_1_1.NumParameters = RootParams.Num();
|
|
|
|
IRError* RootSignatureCreationError = nullptr;
|
|
IRRootSignature* RootSignature = IRRootSignatureCreateFromDescriptor(&RootSignatureDesc, &RootSignatureCreationError);
|
|
if (RootSignature == nullptr || RootSignatureCreationError != nullptr)
|
|
{
|
|
FShaderCompilerError Error(FString::Printf(TEXT("Error: MetalShaderConverter failed to create a root signature for '%s' (%s)!"), *Input.EntryPointName, ANSI_TO_TCHAR((const char *)IRErrorGetPayload(RootSignatureCreationError))));
|
|
Output.Errors.Add(Error);
|
|
Output.bSucceeded = false;
|
|
|
|
return;
|
|
}
|
|
|
|
// Convert DXIL to air
|
|
IRObject* DXILBytecode = IRObjectCreateFromDXIL(reinterpret_cast<const uint8_t*>(DxilData.GetData()), DxilData.Num() * sizeof(uint32), IRBytecodeOwnershipCopy);
|
|
|
|
IRCompiler* CompilerInstance = IRCompilerCreate();
|
|
IRCompilerSetEntryPointName(CompilerInstance, MainCRC);
|
|
IRCompilerSetGlobalRootSignature(CompilerInstance, RootSignature);
|
|
IRCompilerSetStageInGenerationMode(CompilerInstance, IRStageInCodeGenerationModeUseSeparateStageInFunction);
|
|
IRCompilerSetCompatibilityFlags(CompilerInstance, (IRCompatibilityFlags)(IRCompatibilityFlagBoundsCheck | IRCompatibilityFlagPositionInvariance | IRCompatibilityFlagSampleNanToZero | IRCompatibilityFlagTexWriteRoundingRTZ));
|
|
IRCompilerSetMinimumGPUFamily(CompilerInstance, IRGPUFamilyMetal3);
|
|
IRCompilerSetMinimumDeploymentTarget(CompilerInstance, IROperatingSystem_macOS, "15.0.0");
|
|
|
|
#if PLATFORM_SUPPORTS_GEOMETRY_SHADERS
|
|
IRCompilerEnableGeometryAndTessellationEmulation(CompilerInstance, Input.Environment.CompilerFlags.Contains(CFLAG_VertexToGeometryShader));
|
|
#endif
|
|
|
|
// TODO: Is there a flag we could check to avoid this string lookup?
|
|
bool bUsesDualSourceBlending = (SourceData.Find("vk::location") != INDEX_NONE);
|
|
if (bUsesDualSourceBlending)
|
|
{
|
|
IRCompilerSetDualSourceBlendingConfiguration(CompilerInstance, IRDualSourceBlendingConfigurationForceEnabled);
|
|
}
|
|
|
|
// Uncomment to enable IR validation.
|
|
//IRCompilerSetValidationFlags(CompilerInstance, IRCompilerValidationFlagAll);
|
|
|
|
IRError* CompileError = nullptr;
|
|
IRObject* AirBytecode = IRCompilerAllocCompileAndLink(CompilerInstance, nullptr, DXILBytecode, &CompileError);
|
|
if (!AirBytecode || CompileError != nullptr)
|
|
{
|
|
FShaderCompilerError Error(FString::Printf(TEXT("Error: MetalShaderConverter failed to produce air bytecode for '%s' (%s)!"), *Input.EntryPointName, ANSI_TO_TCHAR((const char *)IRErrorGetPayload(CompileError))));
|
|
Output.Errors.Add(Error);
|
|
Output.bSucceeded = false;
|
|
|
|
return;
|
|
}
|
|
const IRShaderStage ShaderStage = ShaderFrequencyToStage(Frequency);
|
|
|
|
// Reflect air
|
|
bool bNeedsAirReflection = (ShaderStage == IRShaderStageVertex || ShaderStage == IRShaderStageFragment || ShaderStage == IRShaderStageCompute
|
|
#if PLATFORM_SUPPORTS_GEOMETRY_SHADERS
|
|
|| ShaderStage == IRShaderStageGeometry
|
|
#endif
|
|
);
|
|
|
|
if (bNeedsAirReflection)
|
|
{
|
|
IRShaderReflection* AirReflection = IRShaderReflectionCreate();
|
|
IRObjectGetReflection(AirBytecode, ShaderStage, AirReflection);
|
|
|
|
if(bDumpDebugInfo)
|
|
{
|
|
ReflectionJSON = IRShaderReflectionCopyJSONString(AirReflection);
|
|
FString ReflectionString = ANSI_TO_TCHAR(ReflectionJSON);
|
|
DumpDebugShaderText(Input, ReflectionString, TEXT("reflection.json"));
|
|
checkSlow(ReflectionJSON);
|
|
}
|
|
|
|
switch (ShaderStage)
|
|
{
|
|
case IRShaderStageVertex:
|
|
{
|
|
// Retrieve VS infos only if GS emulation is used (VS output size is useless otherwise).
|
|
#if PLATFORM_SUPPORTS_GEOMETRY_SHADERS
|
|
IRVersionedVSInfo Info;
|
|
bool bSuccessfulReflectionVS = IRShaderReflectionCopyVertexInfo(AirReflection, IRReflectionVersion_1_0, &Info);
|
|
check(bSuccessfulReflectionVS);
|
|
|
|
OutputSizeVS = Info.info_1_0.vertex_output_size_in_bytes;
|
|
|
|
IRShaderReflectionReleaseVertexInfo(&Info);
|
|
#endif
|
|
|
|
if(!ReflectionJSON)
|
|
{
|
|
// Serialize Reflection for vs (required to generate stage_in functions at PSO creation-time)
|
|
ReflectionJSON = IRShaderReflectionCopyJSONString(AirReflection);
|
|
checkSlow(ReflectionJSON);
|
|
}
|
|
break;
|
|
}
|
|
#if PLATFORM_SUPPORTS_GEOMETRY_SHADERS
|
|
case IRShaderStageGeometry:
|
|
{
|
|
IRVersionedGSInfo Info;
|
|
bool bSuccessfulReflectionGS = IRShaderReflectionCopyGeometryInfo(AirReflection, IRReflectionVersion_1_0, &Info);
|
|
check(bSuccessfulReflectionGS);
|
|
|
|
MaxInputPrimitivesPerMeshThreadgroupGS = Info.info_1_0.max_input_primitives_per_mesh_threadgroup;
|
|
|
|
IRShaderReflectionReleaseGeometryInfo(&Info);
|
|
}
|
|
break;
|
|
#endif
|
|
case IRShaderStageFragment:
|
|
{
|
|
IRVersionedFSInfo Info;
|
|
bool bSuccessfulReflectionPS = IRShaderReflectionCopyFragmentInfo(AirReflection, IRReflectionVersion_1_0, &Info);
|
|
check(bSuccessfulReflectionPS);
|
|
|
|
bUsesDiscard = Info.info_1_0.discards;
|
|
|
|
IRShaderReflectionReleaseFragmentInfo(&Info);
|
|
}
|
|
break;
|
|
|
|
case IRShaderStageCompute:
|
|
{
|
|
IRVersionedCSInfo Info;
|
|
bool bSuccessfulReflectionCS = IRShaderReflectionCopyComputeInfo(AirReflection, IRReflectionVersion_1_0, &Info);
|
|
check(bSuccessfulReflectionCS);
|
|
|
|
CCHeaderWriter.WriteNumThreads(Info.info_1_0.tg_size[0], Info.info_1_0.tg_size[1], Info.info_1_0.tg_size[2]);
|
|
|
|
IRShaderReflectionReleaseComputeInfo(&Info);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
IRShaderReflectionDestroy(AirReflection);
|
|
}
|
|
|
|
// Retrieve the generated .metallib
|
|
IRMetalLibBinary* GeneratedMetalLib = IRMetalLibBinaryCreate();
|
|
if (!IRObjectGetMetalLibBinary(AirBytecode, ShaderStage, GeneratedMetalLib))
|
|
{
|
|
FShaderCompilerError Error(FString::Printf(TEXT("Error: MetalShaderConverter failed to produce a metallib for '%s'!"), *Input.EntryPointName));
|
|
Output.Errors.Add(Error);
|
|
Output.bSucceeded = false;
|
|
|
|
return;
|
|
}
|
|
|
|
size_t MetalLibSize = IRMetalLibGetBytecodeSize(GeneratedMetalLib);
|
|
|
|
MetalBytecode.OutputFile.Reserve(MetalLibSize);
|
|
MetalBytecode.OutputFile.SetNum(MetalLibSize);
|
|
|
|
size_t OutMetalLibSize = IRMetalLibGetBytecode(GeneratedMetalLib, reinterpret_cast<uint8_t*>(MetalBytecode.OutputFile.GetData()));
|
|
//checkSlow(OutMetalLibSize != MetalLibSize);
|
|
|
|
MetalBytecode.ObjectFile.SetNum(MetalLibSize);
|
|
|
|
// Copy the AIR (needed for serialization below)
|
|
MetalBytecode.ObjectFile = MetalBytecode.OutputFile;
|
|
IRRootSignatureDestroy(RootSignature);
|
|
|
|
IRMetalLibBinaryDestroy(GeneratedMetalLib);
|
|
IRObjectDestroy(AirBytecode);
|
|
IRObjectDestroy(DXILBytecode);
|
|
IRCompilerDestroy(CompilerInstance);
|
|
|
|
CCHeaderWriter.WriteSourceInfo(*Input.VirtualSourceFilePath, *Input.EntryPointName);
|
|
CCHeaderWriter.WriteCompilerInfo();
|
|
|
|
FString MetaData = CCHeaderWriter.ToString();
|
|
MetaData += RTString;
|
|
MetaData += TEXT("\n\n");
|
|
if (ALNString.Len())
|
|
{
|
|
MetaData += TEXT("// Attributes: ");
|
|
MetaData += ALNString;
|
|
MetaData += TEXT("\n\n");
|
|
}
|
|
|
|
MetalSource = TCHAR_TO_UTF8(*MetaData);
|
|
|
|
if (bDumpDebugInfo)
|
|
{
|
|
DumpDebugShaderBinary(Input, MetalBytecode.ObjectFile.GetData(), MetalBytecode.ObjectFile.Num() * sizeof(uint8), TEXT("air"));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// Attribute [[clang::optnone]] causes performance hit with WPO on M1 Macs => replace with empty space
|
|
const std::string ClangOptNoneString = "[[clang::optnone]]";
|
|
for (size_t Begin = 0, End = 0; (Begin = MetalSource.find(ClangOptNoneString, End)) != std::string::npos; End = Begin)
|
|
{
|
|
MetalSource.replace(Begin, ClangOptNoneString.length(), " ");
|
|
}
|
|
|
|
if (bDumpDebugInfo && !MetalSource.empty())
|
|
{
|
|
DumpDebugShaderText(Input, &MetalSource[0], MetalSource.size(), TEXT("metal"));
|
|
}
|
|
|
|
Output.Target = Input.Target;
|
|
BuildMetalShaderOutput(Output, Input, MetalSource.c_str(), MetalSource.length(), CRCLen, CRC, VersionEnum, *Standard, *MinOSVersion, Output.Errors, OutputData.TypedBuffers, OutputData.InvariantBuffers, OutputData.TypedUAVs, OutputData.ConstantBuffers, bAllowFastIntrinsics
|
|
, NumCBVs, OutputSizeVS, MaxInputPrimitivesPerMeshThreadgroupGS, bUsesDiscard, ReflectionJSON, MetalBytecode
|
|
);
|
|
}
|
|
|
|
#endif // PLATFORM_MAC || PLATFORM_WINDOWS
|
|
#endif // UE_METAL_USE_METAL_SHADER_CONVERTER
|