Files
UnrealEngine/Engine/Source/Runtime/D3D12RHI/Private/D3D12RayTracing.cpp
2025-05-18 13:04:45 +08:00

6022 lines
234 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "D3D12RayTracing.h"
#if D3D12_RHI_RAYTRACING
#include "D3D12Resources.h"
#include "D3D12Util.h"
#include "Containers/DynamicRHIResourceArray.h"
#include "Experimental/Containers/SherwoodHashTable.h"
#include "BuiltInRayTracingShaders.h"
#include "RayTracingValidationShaders.h"
#include "Hash/xxhash.h"
#include "HAL/CriticalSection.h"
#include "HAL/IConsoleManager.h"
#include "HAL/FileManagerGeneric.h"
#include "Misc/ScopeLock.h"
#include "Async/ParallelFor.h"
#include "Misc/BufferedOutputDevice.h"
#include "String/LexFromString.h"
#include "GlobalRenderResources.h"
#include "D3D12RayTracingDebug.h"
#include "D3D12ExplicitDescriptorCache.h"
#include "D3D12ResourceCollection.h"
#include "RHIShaderBindingLayout.h"
#include "RHIUniformBufferUtilities.h"
#include "RHIResourceUtils.h"
#include "D3D12TextureReference.h"
extern int32 GD3D12ExplicitViewDescriptorHeapSize;
extern int32 GD3D12ExplicitViewDescriptorHeapOverflowReported;
static int32 GRayTracingDebugForceBuildMode = 0;
static FAutoConsoleVariableRef CVarRayTracingDebugForceFastTrace(
TEXT("r.D3D12.RayTracing.DebugForceBuildMode"),
GRayTracingDebugForceBuildMode,
TEXT("Forces specific acceleration structure build mode (not runtime-tweakable).\n")
TEXT("0: Use build mode requested by high-level code (Default)\n")
TEXT("1: Force fast build mode\n")
TEXT("2: Force fast trace mode\n"),
ECVF_ReadOnly
);
static int32 GRayTracingCacheShaderRecords = 1;
static FAutoConsoleVariableRef CVarRayTracingShaderRecordCache(
TEXT("r.D3D12.RayTracing.CacheShaderRecords"),
GRayTracingCacheShaderRecords,
TEXT("Automatically cache and re-use SBT hit group records. This significantly improves CPU performance in large scenes with many identical mesh instances. (default = 1)\n")
TEXT("This mode assumes that contents of uniform buffers does not change during ray tracing resource binding.")
);
static int32 GD3D12RayTracingAllowCompaction = 1;
static FAutoConsoleVariableRef CVarD3D12RayTracingAllowCompaction(
TEXT("r.D3D12.RayTracing.AllowCompaction"),
GD3D12RayTracingAllowCompaction,
TEXT("Whether to automatically perform compaction for static acceleration structures to save GPU memory. (default = 1)\n"),
ECVF_ReadOnly
);
static int32 GD3D12RayTracingMaxBatchedCompaction = 64;
static FAutoConsoleVariableRef CVarD3D12RayTracingMaxBatchedCompaction(
TEXT("r.D3D12.RayTracing.MaxBatchedCompaction"),
GD3D12RayTracingMaxBatchedCompaction,
TEXT("Maximum of amount of compaction requests and rebuilds per frame. (default = 64)\n"),
ECVF_ReadOnly
);
static int32 GD3D12RayTracingCompactionMinPrimitiveCount = 128;
static FAutoConsoleVariableRef CVarD3D12RayTracingCompactionMinPrimitiveCount(
TEXT("r.D3D12.RayTracing.Compaction.MinPrimitiveCount"),
GD3D12RayTracingCompactionMinPrimitiveCount,
TEXT("Sets the minimum primitive count threshold below which geometry skips the compaction. (default = 128)\n")
);
static int32 GRayTracingSpecializeStateObjects = 0;
static FAutoConsoleVariableRef CVarRayTracingSpecializeStateObjects(
TEXT("r.D3D12.RayTracing.SpecializeStateObjects"),
GRayTracingSpecializeStateObjects,
TEXT("Whether to create specialized unique ray tracing pipeline state objects for each ray generation shader. (default = 0)\n")
TEXT("This option can produce more more efficient PSOs for the GPU at the cost of longer creation times and more memory. Requires DXR 1.1.\n"),
ECVF_ReadOnly
);
static int32 GRayTracingAllowSpecializedStateObjects = 1;
static FAutoConsoleVariableRef CVarRayTracingAllowSpecializedStateObjects(
TEXT("r.D3D12.RayTracing.AllowSpecializedStateObjects"),
GRayTracingAllowSpecializedStateObjects,
TEXT("Whether to use specialized RTPSOs if they have been created. ")
TEXT("This is intended for performance testingand has no effect if r.D3D12.RayTracing.SpecializeStateObjects is 0. (default = 1)\n")
);
static int32 GD3D12RayTracingGPUValidation = 0;
static FAutoConsoleVariableRef CVarD3D12RayTracingGPUValidation(
TEXT("r.D3D12.RayTracing.GPUValidation"),
GD3D12RayTracingGPUValidation,
TEXT("Whether to perform validation of ray tracing geometry and other structures on the GPU. Requires Shader Model 6. (default = 0)")
);
// This is required to avoid redundent code static analysis warnings
// If the static_assert fires the assumptions it is predicated on have been
// violated and the code should be revisited
#if WITH_MGPU
#define FOREACH_GPU(Condition, Function) for (uint32 GPUIndex = 0; Condition; ++GPUIndex) Function
#else
static_assert(MAX_NUM_GPUS == 1 && GNumExplicitGPUsForRendering == 1);
#define FOREACH_GPU(Condition, Function) { constexpr uint32 GPUIndex = 0; Function }
#endif
// Ray tracing stat counters
DECLARE_STATS_GROUP(TEXT("D3D12RHI: Ray Tracing"), STATGROUP_D3D12RayTracing, STATCAT_Advanced);
DECLARE_DWORD_ACCUMULATOR_STAT(TEXT("Created pipelines (total)"), STAT_D3D12RayTracingCreatedPipelines, STATGROUP_D3D12RayTracing);
DECLARE_DWORD_ACCUMULATOR_STAT(TEXT("Compiled shaders (total)"), STAT_D3D12RayTracingCompiledShaders, STATGROUP_D3D12RayTracing);
DECLARE_DWORD_ACCUMULATOR_STAT(TEXT("Allocated bottom level acceleration structures"), STAT_D3D12RayTracingAllocatedBLAS, STATGROUP_D3D12RayTracing);
DECLARE_DWORD_ACCUMULATOR_STAT(TEXT("Allocated top level acceleration structures"), STAT_D3D12RayTracingAllocatedTLAS, STATGROUP_D3D12RayTracing);
DECLARE_DWORD_ACCUMULATOR_STAT(TEXT("Triangles in all BL acceleration structures"), STAT_D3D12RayTracingTrianglesBLAS, STATGROUP_D3D12RayTracing);
DECLARE_DWORD_COUNTER_STAT(TEXT("Built BL AS (per frame)"), STAT_D3D12RayTracingBuiltBLAS, STATGROUP_D3D12RayTracing);
DECLARE_DWORD_COUNTER_STAT(TEXT("Updated BL AS (per frame)"), STAT_D3D12RayTracingUpdatedBLAS, STATGROUP_D3D12RayTracing);
DECLARE_DWORD_COUNTER_STAT(TEXT("Built TL AS (per frame)"), STAT_D3D12RayTracingBuiltTLAS, STATGROUP_D3D12RayTracing);
DECLARE_DWORD_COUNTER_STAT(TEXT("Updated TL AS (per frame)"), STAT_D3D12RayTracingUpdatedTLAS, STATGROUP_D3D12RayTracing);
DECLARE_DWORD_COUNTER_STAT(TEXT("UniformBuffer Record Updates (per frame)"), STAT_D3D12RayTracingUBRecordUpdate, STATGROUP_D3D12RayTracing);
DECLARE_MEMORY_STAT(TEXT("Total BL AS Memory"), STAT_D3D12RayTracingBLASMemory, STATGROUP_D3D12RayTracing);
DECLARE_MEMORY_STAT(TEXT("Static BL AS Memory"), STAT_D3D12RayTracingStaticBLASMemory, STATGROUP_D3D12RayTracing);
DECLARE_MEMORY_STAT(TEXT("Dynamic BL AS Memory"), STAT_D3D12RayTracingDynamicBLASMemory, STATGROUP_D3D12RayTracing);
DECLARE_MEMORY_STAT(TEXT("TL AS Memory"), STAT_D3D12RayTracingTLASMemory, STATGROUP_D3D12RayTracing);
DECLARE_MEMORY_STAT(TEXT("Total Used Video Memory"), STAT_D3D12RayTracingUsedVideoMemory, STATGROUP_D3D12RayTracing);
DECLARE_CYCLE_STAT(TEXT("RTPSO Compile Shader"), STAT_RTPSO_CompileShader, STATGROUP_D3D12RayTracing);
DECLARE_CYCLE_STAT(TEXT("RTPSO Create Pipeline"), STAT_RTPSO_CreatePipeline, STATGROUP_D3D12RayTracing);
DECLARE_DWORD_ACCUMULATOR_STAT(TEXT("Allocated shader binding tables"), STAT_D3D12RayTracingAllocatedSBT, STATGROUP_D3D12RayTracing);
DECLARE_CYCLE_STAT(TEXT("SetBindingsOnShaderBindingTable"), STAT_D3D12SetBindingsOnShaderBindingTable, STATGROUP_D3D12RayTracing);
DECLARE_CYCLE_STAT(TEXT("CreateShaderTable"), STAT_D3D12CreateShaderTable, STATGROUP_D3D12RayTracing);
DECLARE_CYCLE_STAT(TEXT("BuildTopLevel"), STAT_D3D12BuildTLAS, STATGROUP_D3D12RayTracing);
DECLARE_CYCLE_STAT(TEXT("BuildBottomLevel"), STAT_D3D12BuildBLAS, STATGROUP_D3D12RayTracing);
DECLARE_CYCLE_STAT(TEXT("DispatchRays"), STAT_D3D12DispatchRays, STATGROUP_D3D12RayTracing);
static ERayTracingAccelerationStructureFlags GetRayTracingAccelerationStructureBuildFlags(const FRayTracingGeometryInitializer& Initializer);
#if UE_BUILD_SHIPPING
inline void RegisterD3D12RayTracingGeometry(FD3D12RayTracingGeometry* Geometry) {};
inline void UnregisterD3D12RayTracingGeometry(FD3D12RayTracingGeometry* Geometry) {};
#else
struct FD3D12RayTracingGeometryTracker
{
TSet<FD3D12RayTracingGeometry*> Geometries;
uint64 TotalBLASSize = 0;
uint64 MaxTotalBLASSize = 0;
FCriticalSection CS;
uint64 GetGeometrySize(FD3D12RayTracingGeometry& Geometry)
{
if (Geometry.AccelerationStructureCompactedSize != 0)
{
return Geometry.AccelerationStructureCompactedSize;
}
else
{
return Geometry.SizeInfo.ResultSize;
}
}
void Add(FD3D12RayTracingGeometry* Geometry)
{
uint64 BLASSize = GetGeometrySize(*Geometry);
FScopeLock Lock(&CS);
Geometries.Add(Geometry);
TotalBLASSize += BLASSize;
MaxTotalBLASSize = FMath::Max(MaxTotalBLASSize, TotalBLASSize);
}
void Remove(FD3D12RayTracingGeometry* Geometry)
{
uint64 BLASSize = GetGeometrySize(*Geometry);
FScopeLock Lock(&CS);
Geometries.Remove(Geometry);
TotalBLASSize -= BLASSize;
}
};
static FD3D12RayTracingGeometryTracker& GetD3D12RayTracingGeometryTracker()
{
static FD3D12RayTracingGeometryTracker Instance;
return Instance;
}
enum class EDumpRayTracingGeometryMode
{
Top,
All,
};
static void DumpRayTracingGeometries(EDumpRayTracingGeometryMode Mode, int32 NumEntriesToShow, const FString& NameFilter, bool bCSV, FBufferedOutputDevice& BufferedOutput)
{
FD3D12RayTracingGeometryTracker& Tracker = GetD3D12RayTracingGeometryTracker();
FScopeLock Lock(&Tracker.CS);
auto GetGeometrySize = [](FD3D12RayTracingGeometry& Geometry)
{
if (Geometry.AccelerationStructureCompactedSize != 0)
{
return Geometry.AccelerationStructureCompactedSize;
}
else
{
return Geometry.SizeInfo.ResultSize;
}
};
TArray<FD3D12RayTracingGeometry*> Geometries = Tracker.Geometries.Array();
Geometries.Sort([GetGeometrySize](FD3D12RayTracingGeometry& A, FD3D12RayTracingGeometry& B)
{
return GetGeometrySize(A) > GetGeometrySize(B);
});
FName CategoryName(TEXT("D3D12RayTracing"));
uint64 TotalSizeBytes = 0;
uint64 TopSizeBytes = 0;
BufferedOutput.CategorizedLogf(CategoryName, ELogVerbosity::Log, TEXT("Tracked FD3D12RayTracingGeometry objects"));
if (NumEntriesToShow < 0 || NumEntriesToShow > Geometries.Num())
{
NumEntriesToShow = Geometries.Num();
}
if (NumEntriesToShow != Geometries.Num())
{
BufferedOutput.CategorizedLogf(CategoryName, ELogVerbosity::Log, TEXT("Showing %d out of %d"), NumEntriesToShow, Geometries.Num());
}
auto ShouldShow = [&NameFilter](FD3D12RayTracingGeometry* Entry)
{
if (NameFilter.IsEmpty())
{
return true;
}
FString DebugName = Entry->DebugName.ToString();
if (DebugName.Find(NameFilter, ESearchCase::IgnoreCase) != INDEX_NONE)
{
return true;
}
else
{
return false;
}
};
FArchive* CSVFile{ nullptr };
if (bCSV)
{
const FString Filename = FString::Printf(TEXT("%sd3d12DumpRayTracingGeometries-%s.csv"), *FPaths::ProfilingDir(), *FDateTime::Now().ToString());
CSVFile = IFileManager::Get().CreateFileWriter(*Filename, FILEWRITE_AllowRead);
const TCHAR* Header = TEXT("Name,Size (MBs),Prims,Segments,Compaction,Update,MarkedForDelete\n");
CSVFile->Serialize(TCHAR_TO_ANSI(Header), FPlatformString::Strlen(Header));
}
int32 ShownEntries = 0;
for (int32 i=0; i< Geometries.Num(); ++i)
{
FD3D12RayTracingGeometry* Geometry = Geometries[i];
uint64 SizeBytes = GetGeometrySize(*Geometry);
ERayTracingAccelerationStructureFlags GeometryBuildFlags = GetRayTracingAccelerationStructureBuildFlags(Geometry->Initializer);
if (ShownEntries < NumEntriesToShow && ShouldShow(Geometry))
{
if (bCSV)
{
const FString Row = FString::Printf(TEXT("%s,%.3f,%d,%d,%d,%d,%d\n"),
!Geometry->DebugName.IsNone() ? *Geometry->DebugName.ToString() : TEXT("*UNKNOWN*"),
SizeBytes / double(1 << 20),
Geometry->Initializer.TotalPrimitiveCount,
Geometry->Initializer.Segments.Num(),
(int32)EnumHasAllFlags(GeometryBuildFlags, ERayTracingAccelerationStructureFlags::AllowCompaction),
(int32)EnumHasAllFlags(GeometryBuildFlags, ERayTracingAccelerationStructureFlags::AllowUpdate),
!Geometry->IsValid());
CSVFile->Serialize(TCHAR_TO_ANSI(*Row), Row.Len());
}
else
{
BufferedOutput.CategorizedLogf(CategoryName, ELogVerbosity::Log, TEXT("Name: %s - Size: %.3f MB - Prims: %d - Segments: %d - Compaction: %d - Update: %d"),
!Geometry->DebugName.IsNone() ? *Geometry->DebugName.ToString() : TEXT("*UNKNOWN*"),
SizeBytes / double(1 << 20),
Geometry->Initializer.TotalPrimitiveCount,
Geometry->Initializer.Segments.Num(),
(int32)EnumHasAllFlags(GeometryBuildFlags, ERayTracingAccelerationStructureFlags::AllowCompaction),
(int32)EnumHasAllFlags(GeometryBuildFlags, ERayTracingAccelerationStructureFlags::AllowUpdate));
}
TopSizeBytes += SizeBytes;
++ShownEntries;
}
TotalSizeBytes += SizeBytes;
}
if (bCSV)
{
delete CSVFile;
CSVFile = nullptr;
}
else
{
double TotalSizeF = double(TotalSizeBytes) / double(1 << 20);
double TopSizeF = double(TopSizeBytes) / double(1 << 20);
if (ShownEntries != Geometries.Num() && ShownEntries)
{
BufferedOutput.CategorizedLogf(CategoryName, ELogVerbosity::Log,
TEXT("Use command `D3D12.DumpRayTracingGeometries all/N [name]` to dump all or N objects. ")
TEXT("Optionally add 'name' to filter entries, such as 'skm_'."));
BufferedOutput.CategorizedLogf(CategoryName, ELogVerbosity::Log, TEXT("Shown %d entries. Size: %.3f MB (%.2f%% of total)"),
ShownEntries, TopSizeF, 100.0 * TopSizeF / TotalSizeF);
}
BufferedOutput.CategorizedLogf(CategoryName, ELogVerbosity::Log, TEXT("Total size: %.3f MB"), TotalSizeF);
}
}
static FAutoConsoleCommandWithWorldArgsAndOutputDevice GD3D12DumpRayTracingGeometriesCmd(
TEXT("D3D12.DumpRayTracingGeometries"),
TEXT("Dump memory allocations for ray tracing resources."),
FConsoleCommandWithWorldArgsAndOutputDeviceDelegate::CreateStatic([](const TArray<FString>& Args, UWorld*, FOutputDevice& OutputDevice)
{
// Default: show top 50 largest objects.
EDumpRayTracingGeometryMode Mode = EDumpRayTracingGeometryMode::Top;
int32 NumEntriesToShow = 50;
bool bCSV = false;
FString NameFilter;
if (Args.Num())
{
if (Args[0] == TEXT("all"))
{
Mode = EDumpRayTracingGeometryMode::All;
NumEntriesToShow = -1;
}
else if (FCString::IsNumeric(*Args[0]))
{
Mode = EDumpRayTracingGeometryMode::Top;
LexFromString(NumEntriesToShow, *Args[0]);
}
if (Args.Num() > 1)
{
NameFilter = Args[1];
}
}
FBufferedOutputDevice BufferedOutput;
DumpRayTracingGeometries(Mode, NumEntriesToShow, NameFilter, bCSV, BufferedOutput);
BufferedOutput.RedirectTo(OutputDevice);
}));
static FAutoConsoleCommandWithWorldArgsAndOutputDevice GD3D12DumpRayTracingGeometriesToCSVCmd(
TEXT("D3D12.DumpRayTracingGeometriesToCSV"),
TEXT("Dump all memory allocations for ray tracing resources to a CSV file on disc."),
FConsoleCommandWithWorldArgsAndOutputDeviceDelegate::CreateStatic([](const TArray<FString>& Args, UWorld*, FOutputDevice& OutputDevice)
{
// CSV dumps all entries
EDumpRayTracingGeometryMode Mode = EDumpRayTracingGeometryMode::All;
int32 NumEntriesToShow = -1;
bool bCSV = true;
FString NameFilter;
FBufferedOutputDevice BufferedOutput;
DumpRayTracingGeometries(Mode, NumEntriesToShow, NameFilter, bCSV, BufferedOutput);
BufferedOutput.RedirectTo(OutputDevice);
}));
inline void RegisterD3D12RayTracingGeometry(FD3D12RayTracingGeometry* Geometry)
{
GetD3D12RayTracingGeometryTracker().Add(Geometry);
}
inline void UnregisterD3D12RayTracingGeometry(FD3D12RayTracingGeometry* Geometry)
{
GetD3D12RayTracingGeometryTracker().Remove(Geometry);
}
#endif // UE_BUILD_SHIPPING
const FD3D12ShaderIdentifier FD3D12ShaderIdentifier::Null = { 0, 0, 0, 0 };
static_assert(sizeof(FD3D12ShaderIdentifier) == D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES, "Unexpected shader identifier size");
static bool ShouldRunRayTracingGPUValidation()
{
// Wave ops are required to run ray tracing validation shaders
const bool bSupportsWaveOps = GRHISupportsWaveOperations && RHISupportsWaveOperations(GMaxRHIShaderPlatform);
return GD3D12RayTracingGPUValidation && bSupportsWaveOps;
}
static D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS TranslateRayTracingAccelerationStructureFlags(ERayTracingAccelerationStructureFlags Flags)
{
uint32 Result = {};
auto HandleFlag = [&Flags, &Result](ERayTracingAccelerationStructureFlags Engine, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS Native)
{
if (EnumHasAllFlags(Flags, Engine))
{
Result |= (uint32)Native;
EnumRemoveFlags(Flags, Engine);
}
};
HandleFlag(ERayTracingAccelerationStructureFlags::AllowUpdate, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_UPDATE);
HandleFlag(ERayTracingAccelerationStructureFlags::AllowCompaction, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_COMPACTION);
HandleFlag(ERayTracingAccelerationStructureFlags::FastTrace, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE);
HandleFlag(ERayTracingAccelerationStructureFlags::FastBuild, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD);
HandleFlag(ERayTracingAccelerationStructureFlags::MinimizeMemory, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_MINIMIZE_MEMORY);
checkf(!EnumHasAnyFlags(Flags, Flags), TEXT("Some ERayTracingAccelerationStructureFlags entries were not handled"));
return D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS(Result);
}
static D3D12_RAYTRACING_GEOMETRY_TYPE TranslateRayTracingGeometryType(ERayTracingGeometryType GeometryType)
{
switch (GeometryType)
{
case RTGT_Triangles:
return D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES;
break;
case RTGT_Procedural:
return D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS;
break;
default:
checkf(false, TEXT("Unexpected ray tracing geometry type"));
return D3D12_RAYTRACING_GEOMETRY_TYPE(0);
}
}
struct FDXILLibrary
{
// No copy assignment or move because FDXILLibrary points to internal struct memory
UE_NONCOPYABLE(FDXILLibrary)
FDXILLibrary() = default;
void InitFromDXIL(const void* Bytecode, uint32 BytecodeLength, const LPCWSTR* InEntryNames, const LPCWSTR* InExportNames, uint32 NumEntryNames)
{
check(NumEntryNames != 0);
check(InEntryNames);
check(InExportNames);
EntryNames.SetNum(NumEntryNames);
ExportNames.SetNum(NumEntryNames);
ExportDesc.SetNum(NumEntryNames);
for (uint32 EntryIndex = 0; EntryIndex < NumEntryNames; ++EntryIndex)
{
EntryNames[EntryIndex] = InEntryNames[EntryIndex];
ExportNames[EntryIndex] = InExportNames[EntryIndex];
ExportDesc[EntryIndex].ExportToRename = *(EntryNames[EntryIndex]);
ExportDesc[EntryIndex].Flags = D3D12_EXPORT_FLAG_NONE;
ExportDesc[EntryIndex].Name = *(ExportNames[EntryIndex]);
}
Desc.DXILLibrary.pShaderBytecode = Bytecode;
Desc.DXILLibrary.BytecodeLength = BytecodeLength;
Desc.NumExports = ExportDesc.Num();
Desc.pExports = ExportDesc.GetData();
}
void InitFromDXIL(const D3D12_SHADER_BYTECODE& ShaderBytecode, LPCWSTR* InEntryNames, LPCWSTR* InExportNames, uint32 NumEntryNames)
{
InitFromDXIL(ShaderBytecode.pShaderBytecode, ShaderBytecode.BytecodeLength, InEntryNames, InExportNames, NumEntryNames);
}
D3D12_STATE_SUBOBJECT GetSubobject() const
{
D3D12_STATE_SUBOBJECT Subobject = {};
Subobject.Type = D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY;
Subobject.pDesc = &Desc;
return Subobject;
}
// NOTE: typical DXIL library may contain up to 3 entry points (i.e. hit groups with closest hit, any hit and intersection shaders)
// Typical case is 1 (RGS, MS or CHS only) or 2 (CHS + AHS for shaders with alpha masking)
static constexpr uint32 ExpectedEntryPoints = 3;
TArray<D3D12_EXPORT_DESC, TInlineAllocator<ExpectedEntryPoints>> ExportDesc;
TArray<FString, TInlineAllocator<ExpectedEntryPoints>> EntryNames;
TArray<FString, TInlineAllocator<ExpectedEntryPoints>> ExportNames;
D3D12_DXIL_LIBRARY_DESC Desc = {};
};
static TRefCountPtr<ID3D12StateObject> CreateRayTracingStateObject(
ID3D12Device5* RayTracingDevice,
const TArrayView<const FDXILLibrary*>& ShaderLibraries,
const TArrayView<LPCWSTR>& Exports,
uint32 MaxAttributeSizeInBytes,
uint32 MaxPayloadSizeInBytes,
const TArrayView<const D3D12_HIT_GROUP_DESC>& HitGroups,
const ID3D12RootSignature* GlobalRootSignature,
const TArrayView<ID3D12RootSignature*>& LocalRootSignatures,
const TArrayView<uint32>& LocalRootSignatureAssociations, // indices into LocalRootSignatures, one per export (may be empty, which assumes single root signature used for everything)
const TArrayView<D3D12_EXISTING_COLLECTION_DESC>& ExistingCollections,
D3D12_STATE_OBJECT_TYPE StateObjectType // Full RTPSO or a Collection
)
{
checkf((LocalRootSignatureAssociations.Num() == 0 && LocalRootSignatures.Num() == 1)
|| (LocalRootSignatureAssociations.Num() == Exports.Num()),
TEXT("There must be exactly one local root signature association per export."));
TRefCountPtr<ID3D12StateObject> Result;
// There are several pipeline sub-objects that are always required:
// 1) D3D12_RAYTRACING_SHADER_CONFIG
// 2) D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION
// 3) D3D12_RAYTRACING_PIPELINE_CONFIG
// 4) D3D12_STATE_OBJECT_CONFIG
// 5) Global root signature
static constexpr uint32 NumRequiredSubobjects = 5;
TArray<D3D12_STATE_SUBOBJECT> Subobjects;
Subobjects.SetNumUninitialized(NumRequiredSubobjects
+ ShaderLibraries.Num()
+ HitGroups.Num()
+ LocalRootSignatures.Num()
+ Exports.Num()
+ ExistingCollections.Num()
);
TArray<D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION> ExportAssociations;
ExportAssociations.SetNumUninitialized(Exports.Num());
uint32 Index = 0;
const uint32 NumExports = Exports.Num();
// Shader libraries
for (const FDXILLibrary* Library : ShaderLibraries)
{
Subobjects[Index++] = Library->GetSubobject();
}
// Shader config
D3D12_RAYTRACING_SHADER_CONFIG ShaderConfig = {};
ShaderConfig.MaxAttributeSizeInBytes = MaxAttributeSizeInBytes;
check(ShaderConfig.MaxAttributeSizeInBytes <= RAY_TRACING_MAX_ALLOWED_ATTRIBUTE_SIZE);
ShaderConfig.MaxPayloadSizeInBytes = MaxPayloadSizeInBytes;
const uint32 ShaderConfigIndex = Index;
Subobjects[Index++] = D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG, &ShaderConfig};
// Shader config association
D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION ShaderConfigAssociation = {};
ShaderConfigAssociation.NumExports = Exports.Num();
ShaderConfigAssociation.pExports = Exports.GetData();
ShaderConfigAssociation.pSubobjectToAssociate = &Subobjects[ShaderConfigIndex];
Subobjects[Index++] = D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_SUBOBJECT_TO_EXPORTS_ASSOCIATION, &ShaderConfigAssociation };
// Hit groups
for (const D3D12_HIT_GROUP_DESC& HitGroupDesc : HitGroups)
{
Subobjects[Index++] = D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP, &HitGroupDesc };
}
// Pipeline config
D3D12_RAYTRACING_PIPELINE_CONFIG PipelineConfig = {};
PipelineConfig.MaxTraceRecursionDepth = RAY_TRACING_MAX_ALLOWED_RECURSION_DEPTH;
const uint32 PipelineConfigIndex = Index;
Subobjects[Index++] = D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG, &PipelineConfig };
// State object config
D3D12_STATE_OBJECT_CONFIG StateObjectConfig = {};
if (GRHISupportsRayTracingPSOAdditions)
{
StateObjectConfig.Flags = D3D12_STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS;
}
Subobjects[Index++] = D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_STATE_OBJECT_CONFIG, &StateObjectConfig };
// Global root signature
Subobjects[Index++] = D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE, &GlobalRootSignature };
// Local root signatures
const uint32 LocalRootSignatureBaseIndex = Index;
for (int32 SignatureIndex = 0; SignatureIndex < LocalRootSignatures.Num(); ++SignatureIndex)
{
checkf(LocalRootSignatures[SignatureIndex], TEXT("All local root signatures must be valid"));
Subobjects[Index++] = D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_LOCAL_ROOT_SIGNATURE, &LocalRootSignatures[SignatureIndex] };
}
// Local root signature associations
for (int32 ExportIndex = 0; ExportIndex < Exports.Num(); ++ExportIndex)
{
// If custom LocalRootSignatureAssociations data is not provided, then assume same default local RS association.
const int32 LocalRootSignatureIndex = LocalRootSignatureAssociations.Num() != 0
? LocalRootSignatureAssociations[ExportIndex]
: 0;
D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION& Association = ExportAssociations[ExportIndex];
Association = D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION{};
Association.NumExports = 1;
Association.pExports = &Exports[ExportIndex];
check(LocalRootSignatureIndex < LocalRootSignatures.Num());
Association.pSubobjectToAssociate = &Subobjects[LocalRootSignatureBaseIndex + LocalRootSignatureIndex];
Subobjects[Index++] = D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_SUBOBJECT_TO_EXPORTS_ASSOCIATION, &ExportAssociations[ExportIndex] };
}
// Existing collection objects
for (int32 CollectionIndex = 0; CollectionIndex < ExistingCollections.Num(); ++CollectionIndex)
{
Subobjects[Index++] = D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_EXISTING_COLLECTION, &ExistingCollections[CollectionIndex] };
}
// Done!
checkf(Index == Subobjects.Num(), TEXT("All pipeline subobjects must be initialized."));
// Create ray tracing pipeline state object
D3D12_STATE_OBJECT_DESC Desc = {};
Desc.NumSubobjects = Index;
Desc.pSubobjects = &Subobjects[0];
Desc.Type = StateObjectType;
#if WITH_NVAPI
bool bReorderingDisabled = false;
if (IsRHIDeviceNVIDIA())
{
// Enable the NVAPI extension to allow shader extensions in ray tracing for shader execution reordering
NvAPI_Status NvapiStatus = NvAPI_D3D12_SetNvShaderExtnSlotSpaceLocalThread(RayTracingDevice, UE_HLSL_SLOT_NV_SHADER_EXTN, UE_HLSL_SPACE_NV_SHADER_EXTN);
bReorderingDisabled = ensureMsgf(NvapiStatus == NVAPI_OK, TEXT("NVAPI ERROR %d on setup extensions\n"), NvapiStatus);
}
#endif
HRESULT hr = RayTracingDevice->CreateStateObject(&Desc, IID_PPV_ARGS(Result.GetInitReference()));
if (FAILED(hr))
{
// Failed state objects are not fatal if it's coming from partial/non-required RTPSO
// Will be fatal when it's actually needed for rendering
UE_LOG(LogD3D12RHI, Warning, TEXT("Failed to create raytracing RTPSO - add '-d3ddebug' to the command line to get more info on RTPSO compilation errors from the debug device"));
// Catch GPU crashes explicitly.
if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_HUNG)
{
VERIFYD3D12RESULT_EX(hr, RayTracingDevice);
}
}
#if WITH_NVAPI
if (bReorderingDisabled)
{
// Disable the NVAPI extension slot again after state object creation.
NvAPI_Status NvapiStatus = NvAPI_D3D12_SetNvShaderExtnSlotSpaceLocalThread(RayTracingDevice, ~0u, 0);
checkf(NvapiStatus == NVAPI_OK, TEXT("NVAPI ERROR %d on clear extensions\n"), NvapiStatus);
}
#endif
INC_DWORD_STAT(STAT_D3D12RayTracingCreatedPipelines);
INC_DWORD_STAT_BY(STAT_D3D12RayTracingCompiledShaders, NumExports);
return Result;
}
inline uint64 GetShaderHash64(FRHIRayTracingShader* ShaderRHI)
{
uint64 ShaderHash; // 64 bits from the shader SHA1
FMemory::Memcpy(&ShaderHash, ShaderRHI->GetHash().Hash, sizeof(ShaderHash));
return ShaderHash;
}
// Generates a stable symbol name for a ray tracing shader, used for RT PSO creation.
inline FString GenerateShaderName(const TCHAR* Prefix, uint64 Hash)
{
return FString::Printf(TEXT("%s_%016llx"), Prefix, Hash);
}
inline FString GenerateShaderName(FRHIRayTracingShader* ShaderRHI)
{
const FD3D12RayTracingShader* Shader = FD3D12DynamicRHI::ResourceCast(ShaderRHI);
uint64 ShaderHash = GetShaderHash64(ShaderRHI);
return GenerateShaderName(*(Shader->EntryPoint), ShaderHash);
}
static FD3D12ShaderIdentifier GetShaderIdentifier(ID3D12StateObjectProperties* PipelineProperties, const TCHAR* ExportName)
{
const void* ShaderIdData = PipelineProperties->GetShaderIdentifier(ExportName);
checkf(ShaderIdData, TEXT("Couldn't find requested export in the ray tracing shader pipeline"));
FD3D12ShaderIdentifier Result;
Result.SetData(ShaderIdData);
return Result;
}
static FD3D12ShaderIdentifier GetShaderIdentifier(ID3D12StateObject* StateObject, const TCHAR* ExportName)
{
TRefCountPtr<ID3D12StateObjectProperties> PipelineProperties;
HRESULT QueryInterfaceResult = StateObject->QueryInterface(IID_PPV_ARGS(PipelineProperties.GetInitReference()));
checkf(SUCCEEDED(QueryInterfaceResult), TEXT("Failed to query pipeline properties from the ray tracing pipeline state object. Result=%08x"), QueryInterfaceResult);
return GetShaderIdentifier(PipelineProperties, ExportName);
}
FD3D12RayTracingCompactionRequestHandler::FD3D12RayTracingCompactionRequestHandler(FD3D12Device* Device)
: FD3D12DeviceChild(Device)
{
const size_t BufferSize = GD3D12RayTracingMaxBatchedCompaction * sizeof(uint64);
const D3D12_RESOURCE_DESC ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(BufferSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
const FRHIBufferCreateDesc CreateDesc =
FRHIBufferCreateDesc::Create(TEXT("PostBuildInfoBuffer"), BufferSize, 0, BUF_UnorderedAccess | BUF_SourceCopy)
.SetInitialState(ERHIAccess::UAVMask)
.SetGPUMask(FRHIGPUMask::FromIndex(GetParentDevice()->GetGPUIndex()));
PostBuildInfoBuffer = GetParentDevice()->GetParentAdapter()->CreateRHIBuffer(
ResourceDesc,
8,
CreateDesc,
ED3D12ResourceStateMode::MultiState,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS
);
SetD3D12ResourceName(PostBuildInfoBuffer->GetResource(), TEXT("PostBuildInfoBuffer"));
PostBuildInfoStagingBuffer = RHICreateStagingBuffer();
}
void FD3D12RayTracingCompactionRequestHandler::RequestCompact(FD3D12RayTracingGeometry* InRTGeometry)
{
uint32 GPUIndex = GetParentDevice()->GetGPUIndex();
check(InRTGeometry->AccelerationStructureBuffers[GPUIndex]);
ERayTracingAccelerationStructureFlags GeometryBuildFlags = GetRayTracingAccelerationStructureBuildFlags(InRTGeometry->Initializer);
check(EnumHasAllFlags(GeometryBuildFlags, ERayTracingAccelerationStructureFlags::AllowCompaction) &&
EnumHasAllFlags(GeometryBuildFlags, ERayTracingAccelerationStructureFlags::FastTrace) &&
!EnumHasAnyFlags(GeometryBuildFlags, ERayTracingAccelerationStructureFlags::AllowUpdate));
FScopeLock Lock(&CS);
PendingRequests.Add(InRTGeometry);
}
bool FD3D12RayTracingCompactionRequestHandler::ReleaseRequest(FD3D12RayTracingGeometry* InRTGeometry)
{
FScopeLock Lock(&CS);
// Remove from pending list, not found then try active requests
if (PendingRequests.Remove(InRTGeometry) <= 0)
{
// If currently enqueued, then clear pointer to not handle the compaction request anymore
for (int32 BLASIndex = 0; BLASIndex < ActiveBLASGPUAddresses.Num(); ++BLASIndex)
{
if (ActiveRequests[BLASIndex] == InRTGeometry)
{
ActiveRequests[BLASIndex] = nullptr;
return true;
}
}
return false;
}
else
{
return true;
}
}
void FD3D12RayTracingCompactionRequestHandler::Update(FD3D12CommandContext& Context)
{
LLM_SCOPE_BYNAME(TEXT("FD3D12RT/Compaction"));
FScopeLock Lock(&CS);
// process previous build request data retrieval
uint32 GPUIndex = GetParentDevice()->GetGPUIndex();
if (ActiveBLASGPUAddresses.Num() > 0)
{
// Ensure that our builds & copies have finished on GPU when enqueued - if still busy then wait until done
if (PostBuildInfoBufferReadbackSyncPoint && !PostBuildInfoBufferReadbackSyncPoint->IsComplete())
{
return;
}
// Readback the sizes from the readback buffer and schedule new builds ops on the RTGeometry objects
uint64* SizesAfterCompaction = (uint64*)PostBuildInfoStagingBuffer->Lock(0, ActiveBLASGPUAddresses.Num() * sizeof(uint64));
for (int32 BLASIndex = 0; BLASIndex < ActiveBLASGPUAddresses.Num(); ++BLASIndex)
{
if (ActiveRequests[BLASIndex] != nullptr)
{
ActiveRequests[BLASIndex]->CompactAccelerationStructure(Context, GPUIndex, SizesAfterCompaction[BLASIndex]);
}
}
PostBuildInfoStagingBuffer->Unlock();
// reset working values
PostBuildInfoBufferReadbackSyncPoint = nullptr;
ActiveRequests.Empty(ActiveRequests.Num());
ActiveBLASGPUAddresses.Empty(ActiveBLASGPUAddresses.Num());
}
// build a new set of build requests to extract the build data
for (FD3D12RayTracingGeometry* RTGeometry : PendingRequests)
{
ActiveRequests.Add(RTGeometry);
FD3D12ResourceLocation& ResourceLocation = RTGeometry->AccelerationStructureBuffers[GPUIndex].GetReference()->ResourceLocation;
ActiveBLASGPUAddresses.Add(ResourceLocation.GetGPUVirtualAddress());
Context.UpdateResidency(ResourceLocation.GetResource());
// enqueued enough requests for this update round
if (ActiveRequests.Num() >= GD3D12RayTracingMaxBatchedCompaction)
{
break;
}
}
// Do we have requests?
if (ActiveRequests.Num() > 0)
{
// clear out all of the pending requests, don't allow the array to shrink
PendingRequests.RemoveAt(0, ActiveRequests.Num(), EAllowShrinking::No);
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC PostBuildInfoDesc = {};
PostBuildInfoDesc.DestBuffer = PostBuildInfoBuffer->ResourceLocation.GetGPUVirtualAddress();
PostBuildInfoDesc.InfoType = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE;
//PostBuildInfoBuffer enters in D3D12_RESOURCE_STATE_UNORDERED_ACCESS
//Context.TransitionResource(PostBuildInfoBuffer->GetResource(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0);
// Force UAV barrier to make sure all previous builds ops are finished
Context.AddUAVBarrier();
Context.FlushResourceBarriers();
// Emit the RT post build info from the selected requests
Context.RayTracingCommandList()->EmitRaytracingAccelerationStructurePostbuildInfo(&PostBuildInfoDesc, ActiveBLASGPUAddresses.Num(), ActiveBLASGPUAddresses.GetData());
// Transition to copy source and perform the copy to readback
Context.TransitionResource(PostBuildInfoBuffer->GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE, 0);
Context.FlushResourceBarriers();
Context.RHICopyToStagingBuffer(PostBuildInfoBuffer, PostBuildInfoStagingBuffer, 0, sizeof(uint64) * ActiveBLASGPUAddresses.Num());
Context.TransitionResource(PostBuildInfoBuffer->GetResource(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0);
// Update the sync point
PostBuildInfoBufferReadbackSyncPoint = Context.GetContextSyncPoint();
}
}
// Cache for ray tracing pipeline collection objects, containing single shaders that can be linked into full pipelines.
class FD3D12RayTracingPipelineCache : FD3D12AdapterChild
{
public:
UE_NONCOPYABLE(FD3D12RayTracingPipelineCache)
FD3D12RayTracingPipelineCache(FD3D12Adapter* Adapter)
: FD3D12AdapterChild(Adapter)
, DefaultLocalRootSignature(Adapter)
{
// Default empty local root signature
LLM_SCOPE_BYNAME(TEXT("FD3D12RT/PipelineCache"));
D3D12_VERSIONED_ROOT_SIGNATURE_DESC LocalRootSignatureDesc = {};
if (GetParentAdapter()->GetRootSignatureVersion() >= D3D_ROOT_SIGNATURE_VERSION_1_1)
{
LocalRootSignatureDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
LocalRootSignatureDesc.Desc_1_1.Flags |= D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE;
}
else
{
LocalRootSignatureDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_0;
LocalRootSignatureDesc.Desc_1_0.Flags |= D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE;
}
DefaultLocalRootSignature.Init(LocalRootSignatureDesc, RS_RayTracingLocal);
}
~FD3D12RayTracingPipelineCache()
{
Reset();
}
struct FKey
{
uint64 ShaderHash = 0;
uint32 MaxAttributeSizeInBytes = 0;
uint32 MaxPayloadSizeInBytes = 0;
ID3D12RootSignature* GlobalRootSignature = nullptr;
ID3D12RootSignature* LocalRootSignature = nullptr;
bool operator == (const FKey& Other) const
{
return ShaderHash == Other.ShaderHash
&& MaxAttributeSizeInBytes == Other.MaxAttributeSizeInBytes
&& MaxPayloadSizeInBytes == Other.MaxPayloadSizeInBytes
&& GlobalRootSignature == Other.GlobalRootSignature
&& LocalRootSignature == Other.LocalRootSignature;
}
inline friend uint32 GetTypeHash(const FKey& Key)
{
return Key.ShaderHash;
}
};
enum class ECollectionType
{
Unknown,
RayGen,
Miss,
HitGroup,
Callable,
};
struct FEntry
{
// Move-only type
FEntry() = default;
FEntry(FEntry&& Other) = default;
FEntry(const FEntry&) = delete;
FEntry& operator = (const FEntry&) = delete;
FEntry& operator = (FEntry&& Other) = delete;
D3D12_EXISTING_COLLECTION_DESC GetCollectionDesc()
{
check(bDeserialized || (CompileEvent.IsValid() && CompileEvent->IsComplete()));
check(StateObject);
if (StateObject == nullptr)
{
UE_LOG(LogD3D12RHI, Fatal, TEXT("Trying to add a raytracing state collection which failed to compile to a RTPSO collection."));
}
D3D12_EXISTING_COLLECTION_DESC Result = {};
Result.pExistingCollection = StateObject;
return Result;
}
const TCHAR* GetPrimaryExportNameChars()
{
checkf(ExportNames.Num()!=0, TEXT("This ray tracing shader collection does not export any symbols."));
return *(ExportNames[0]);
}
ECollectionType CollectionType = ECollectionType::Unknown;
TRefCountPtr<FD3D12RayTracingShader> Shader;
TRefCountPtr<ID3D12StateObject> StateObject;
FD3D12RayTracingPipelineInfo PipelineInfo;
FGraphEventRef CompileEvent;
bool bDeserialized = false;
static constexpr uint32 MaxExports = 4;
TArray<FString, TFixedAllocator<MaxExports>> ExportNames;
FD3D12ShaderIdentifier Identifier;
float CompileTimeMS = 0.0f;
};
static const TCHAR* GetCollectionTypeName(ECollectionType Type)
{
switch (Type)
{
case ECollectionType::Unknown:
return TEXT("Unknown");
case ECollectionType::RayGen:
return TEXT("RayGen");
case ECollectionType::Miss:
return TEXT("Miss");
case ECollectionType::HitGroup:
return TEXT("HitGroup");
case ECollectionType::Callable:
return TEXT("Callable");
default:
return TEXT("");
}
}
class FShaderCompileTask
{
public:
UE_NONCOPYABLE(FShaderCompileTask)
FShaderCompileTask(
FEntry& InEntry,
FKey InCacheKey,
FD3D12Device* InDevice,
ECollectionType InCollectionType,
bool bInRequired)
: Entry(InEntry)
, CacheKey(InCacheKey)
, Device(InDevice)
, RayTracingDevice(InDevice->GetDevice5())
, CollectionType(InCollectionType)
, bRequired(bInRequired)
{
}
static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }
void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
{
SCOPE_CYCLE_COUNTER(STAT_RTPSO_CompileShader);
TRACE_CPUPROFILER_EVENT_SCOPE(ShaderCompileTask);
uint64 CompileTimeCycles = 0;
CompileTimeCycles -= FPlatformTime::Cycles64();
FD3D12RayTracingShader* Shader = Entry.Shader;
static constexpr uint32 MaxEntryPoints = 3; // CHS+AHS+IS for HitGroup or just a single entry point for other collection types
TArray<LPCWSTR, TFixedAllocator<MaxEntryPoints>> OriginalEntryPoints;
TArray<LPCWSTR, TFixedAllocator<MaxEntryPoints>> RenamedEntryPoints;
const uint32 NumHitGroups = CollectionType == ECollectionType::HitGroup ? 1 : 0;
const uint64 ShaderHash = CacheKey.ShaderHash;
ID3D12RootSignature* GlobalRootSignature = CacheKey.GlobalRootSignature;
ID3D12RootSignature* LocalRootSignature = CacheKey.LocalRootSignature;
const uint32 DefaultLocalRootSignatureIndex = 0;
uint32 MaxAttributeSizeInBytes = CacheKey.MaxAttributeSizeInBytes;
uint32 MaxPayloadSizeInBytes = CacheKey.MaxPayloadSizeInBytes;
D3D12_HIT_GROUP_DESC HitGroupDesc = {};
if (CollectionType == ECollectionType::HitGroup)
{
HitGroupDesc.HitGroupExport = Entry.GetPrimaryExportNameChars();
HitGroupDesc.Type = Shader->IntersectionEntryPoint.IsEmpty() ? D3D12_HIT_GROUP_TYPE_TRIANGLES : D3D12_HIT_GROUP_TYPE_PROCEDURAL_PRIMITIVE;
{
const FString& ExportName = Entry.ExportNames.Add_GetRef(GenerateShaderName(TEXT("CHS"), ShaderHash));
HitGroupDesc.ClosestHitShaderImport = *ExportName;
OriginalEntryPoints.Add(*(Shader->EntryPoint));
RenamedEntryPoints.Add(*ExportName);
}
if (!Shader->AnyHitEntryPoint.IsEmpty())
{
const FString& ExportName = Entry.ExportNames.Add_GetRef(GenerateShaderName(TEXT("AHS"), ShaderHash));
HitGroupDesc.AnyHitShaderImport = *ExportName;
OriginalEntryPoints.Add(*(Shader->AnyHitEntryPoint));
RenamedEntryPoints.Add(*ExportName);
}
if (!Shader->IntersectionEntryPoint.IsEmpty())
{
const FString& ExportName = Entry.ExportNames.Add_GetRef(GenerateShaderName(TEXT("IS"), ShaderHash));
HitGroupDesc.IntersectionShaderImport = *ExportName;
OriginalEntryPoints.Add(*(Shader->IntersectionEntryPoint));
RenamedEntryPoints.Add(*ExportName);
}
}
else
{
checkf(CollectionType == ECollectionType::Miss || CollectionType == ECollectionType::RayGen || CollectionType == ECollectionType::Callable, TEXT("Unexpected RT shader collection type"));
OriginalEntryPoints.Add(*(Shader->EntryPoint));
RenamedEntryPoints.Add(Entry.GetPrimaryExportNameChars());
}
// Validate that memory reservation was correct
check(Entry.ExportNames.Num() <= Entry.MaxExports);
FDXILLibrary Library;
Library.InitFromDXIL(Shader->GetShaderBytecode(), OriginalEntryPoints.GetData(), RenamedEntryPoints.GetData(), OriginalEntryPoints.Num());
const FDXILLibrary* LibraryPtr = &Library;
Entry.StateObject = CreateRayTracingStateObject(
RayTracingDevice,
MakeArrayView(&LibraryPtr, 1),
RenamedEntryPoints,
MaxAttributeSizeInBytes,
MaxPayloadSizeInBytes,
MakeArrayView(&HitGroupDesc, NumHitGroups),
GlobalRootSignature,
MakeArrayView(&LocalRootSignature, 1),
{}, // LocalRootSignatureAssociations (single RS will be used for all exports since this is null)
{}, // ExistingCollections
D3D12_STATE_OBJECT_TYPE_COLLECTION);
if (Entry.StateObject)
{
Device->GetRayTracingPipelineInfo(Entry.StateObject, &Entry.PipelineInfo);
// Retrieve the identifier from the library
Entry.Identifier = GetShaderIdentifier(Entry.StateObject, Entry.GetPrimaryExportNameChars());
}
else if (bRequired)
{
UE_LOG(LogD3D12RHI, Fatal, TEXT("Failed to create a required RTPSO pipeline state collection"));
}
CompileTimeCycles += FPlatformTime::Cycles64();
Entry.CompileTimeMS = float(FPlatformTime::ToMilliseconds64(CompileTimeCycles));
if (Entry.CompileTimeMS >= 1000.0f)
{
// Log compilations of individual shaders that took more than 1 second
UE_LOG(LogD3D12RHI, Log, TEXT("Compiled %s for RTPSO in %.2f ms."), OriginalEntryPoints[0], Entry.CompileTimeMS);
}
}
FORCEINLINE TStatId GetStatId() const
{
return GET_STATID(STAT_RTPSO_CompileShader);
}
ENamedThreads::Type GetDesiredThread()
{
return ENamedThreads::AnyHiPriThreadHiPriTask;
}
FEntry& Entry;
FKey CacheKey;
FD3D12Device* Device;
ID3D12Device5* RayTracingDevice;
ECollectionType CollectionType;
bool bRequired;
};
FEntry* GetOrCompileShader(
FD3D12Device* Device,
FD3D12RayTracingShader* Shader,
ID3D12RootSignature* GlobalRootSignature,
uint32 MaxAttributeSizeInBytes,
uint32 MaxPayloadSizeInBytes,
bool bRequired,
ECollectionType CollectionType,
FGraphEventArray& CompletionList,
bool* bOutCacheHit = nullptr)
{
FScopeLock Lock(&CriticalSection);
const uint64 ShaderHash = GetShaderHash64(Shader);
ID3D12RootSignature* LocalRootSignature = nullptr;
if (CollectionType == ECollectionType::RayGen)
{
// RayGen shaders use a default empty local root signature as all their resources bound via global RS.
LocalRootSignature = DefaultLocalRootSignature.GetRootSignature();
}
else
{
// All other shaders (hit groups, miss, callable) use custom root signatures.
LocalRootSignature = Shader->LocalRootSignature->GetRootSignature();
}
FKey CacheKey;
CacheKey.ShaderHash = ShaderHash;
CacheKey.MaxAttributeSizeInBytes = MaxAttributeSizeInBytes;
CacheKey.MaxPayloadSizeInBytes = MaxPayloadSizeInBytes;
CacheKey.GlobalRootSignature = GlobalRootSignature;
CacheKey.LocalRootSignature = LocalRootSignature;
FEntry*& FindResult = Cache.FindOrAdd(CacheKey);
if (FindResult)
{
if (bOutCacheHit) *bOutCacheHit = true;
}
else
{
if (bOutCacheHit) *bOutCacheHit = false;
if (FindResult == nullptr)
{
FindResult = new FEntry;
}
FEntry& Entry = *FindResult;
Entry.CollectionType = CollectionType;
Entry.Shader = Shader;
if (Shader->bPrecompiledPSO)
{
D3D12_SHADER_BYTECODE Bytecode = Shader->GetShaderBytecode();
Entry.StateObject = Device->DeserializeRayTracingStateObject(Bytecode, GlobalRootSignature);
if (Entry.StateObject)
{
Device->GetRayTracingPipelineInfo(Entry.StateObject, &Entry.PipelineInfo);
}
checkf(Entry.StateObject != nullptr, TEXT("Failed to deserialize RTPSO"));
Entry.ExportNames.Add(Shader->EntryPoint);
Entry.Identifier = GetShaderIdentifier(Entry.StateObject, *Shader->EntryPoint);
Entry.bDeserialized = true;
}
else
{
// Generate primary export name, which is immediately required on the PSO creation thread.
Entry.ExportNames.Add(GenerateShaderName(GetCollectionTypeName(CollectionType), ShaderHash));
checkf(Entry.ExportNames.Num() == 1, TEXT("Primary export name must always be first."));
// Defer actual compilation to another task, as there may be many shaders that may be compiled in parallel.
// Result of the compilation (the collection PSO) is not needed until final RT PSO is linked.
Entry.CompileEvent = TGraphTask<FShaderCompileTask>::CreateTask().ConstructAndDispatchWhenReady(
Entry,
CacheKey,
Device,
CollectionType,
bRequired
);
}
}
if (FindResult->CompileEvent.IsValid())
{
if (!FindResult->CompileEvent->IsComplete())
{
CompletionList.Add(FindResult->CompileEvent);
}
}
else
{
// required PSOs collection are fatal if not valid (can have failed during precaching of this
// RTPSO collection and wasn't fatal during precaching, but did fill the cache)
if (bRequired && FindResult->StateObject == nullptr)
{
UE_LOG(LogD3D12RHI, Fatal, TEXT("Failed to create a required RTPSO collection"));
}
}
return FindResult;
}
void Reset()
{
FScopeLock Lock(&CriticalSection);
for (auto It : Cache)
{
delete It.Value;
}
Cache.Reset();
}
ID3D12RootSignature* GetGlobalRootSignature(const FRHIShaderBindingLayout& ShaderBindingLayout)
{
FD3D12Adapter* Adapter = GetParentAdapter();
const FD3D12RootSignature* RootSignature = Adapter->GetGlobalRayTracingRootSignature(ShaderBindingLayout);
return RootSignature->GetRootSignature();
}
private:
FCriticalSection CriticalSection;
TMap<FKey, FEntry*> Cache;
FD3D12RootSignature DefaultLocalRootSignature; // Default empty root signature used for default hit shaders.
};
inline bool AreBindlessResourcesEnabledForRayTracing(FD3D12Adapter* Adapter)
{
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
FD3D12BindlessDescriptorManager& Manager = Adapter->GetDevice(0)->GetBindlessDescriptorManager();
if (IsBindlessEnabledForRayTracing(Manager.GetConfiguration()))
{
return true;
}
#endif
return false;
}
// Helper class used to manage SBT buffer for a specific GPU
class FD3D12RayTracingShaderBindingTableInternal
{
private:
void WriteData(uint32 WriteOffset, const void* InData, uint32 InDataSize)
{
#if DO_CHECK && DO_GUARD_SLOW
Data.RangeCheck(WriteOffset);
Data.RangeCheck(WriteOffset + InDataSize - 1);
#endif // DO_CHECK && DO_GUARD_SLOW
FMemory::Memcpy(Data.GetData() + WriteOffset, InData, InDataSize);
}
void CompareData(uint32 Offset, const void* InData, uint32 InDataSize)
{
const void* CurrentData = Data.GetData() + Offset;
ensure(FMemory::Memcmp(CurrentData, InData, InDataSize) == 0);
}
void WriteLocalShaderRecord(uint32 ShaderTableOffset, uint32 RecordIndex, uint32 OffsetWithinRecord, const void* InData, uint32 InDataSize)
{
checkfSlow(OffsetWithinRecord % 4 == 0, TEXT("SBT record parameters must be written on DWORD-aligned boundary"));
checkfSlow(InDataSize % 4 == 0, TEXT("SBT record parameters must be DWORD-aligned"));
checkfSlow(OffsetWithinRecord + InDataSize <= LocalRecordSizeUnaligned, TEXT("SBT record write request is out of bounds"));
const uint32 WriteOffset = ShaderTableOffset + LocalRecordStride * RecordIndex + OffsetWithinRecord;
WriteData(WriteOffset, InData, InDataSize);
}
void CompareLocalShaderRecord(uint32 ShaderTableOffset, uint32 RecordIndex, uint32 OffsetWithinRecord, const void* InData, uint32 InDataSize)
{
const uint32 Offset = ShaderTableOffset + LocalRecordStride * RecordIndex + OffsetWithinRecord;
CompareData(Offset, InData, InDataSize);
}
public:
UE_NONCOPYABLE(FD3D12RayTracingShaderBindingTableInternal)
// Ray tracing shader bindings can be processed in parallel.
// Each concurrent worker gets its own dedicated descriptor cache instance to avoid contention or locking.
// Scaling beyond 5 total threads does not yield any speedup in practice.
static constexpr uint32 MaxBindingWorkers = 5; // RHI thread + 4 parallel workers.
FD3D12RayTracingShaderBindingTableInternal(FRHICommandListBase& RHICmdList, const FRayTracingShaderBindingTableInitializer& Initializer, FD3D12Device* InDevice)
: UniqueId(NextUniqueId++)
{
checkf(Initializer.LocalBindingDataSize <= 4096, TEXT("The maximum size of a local root signature is 4KB.")); // as per section 4.22.1 of DXR spec v1.0
check(Initializer.ShaderBindingMode != ERayTracingShaderBindingMode::Disabled);
Device = InDevice;
Lifetime = Initializer.Lifetime;
HitGroupIndexingMode = Initializer.HitGroupIndexingMode;
ShaderBindingMode = Initializer.ShaderBindingMode;
NumShaderSlotsPerGeometrySegment = Initializer.NumShaderSlotsPerGeometrySegment;
const uint32 NumHitGroupSlots = Initializer.HitGroupIndexingMode == ERayTracingHitGroupIndexingMode::Allow ? Initializer.NumGeometrySegments * Initializer.NumShaderSlotsPerGeometrySegment : 1;
NumMissRecords = Initializer.NumMissShaderSlots;
NumHitRecords = NumHitGroupSlots;
NumCallableRecords = Initializer.NumCallableShaderSlots;
if (EnumHasAnyFlags(ShaderBindingMode, ERayTracingShaderBindingMode::RTPSO))
{
checkf(Initializer.LocalBindingDataSize >= sizeof(FD3D12HitGroupSystemParameters), TEXT("All local root signatures are expected to contain ray tracing system root parameters"));
LocalRecordSizeUnaligned = ShaderIdentifierSize + Initializer.LocalBindingDataSize;
LocalRecordStride = RoundUpToNextMultiple(LocalRecordSizeUnaligned, D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT);
// Custom descriptor cache is only required when local resources may be bound.
// If only global resources are used, then transient descriptor cache can be used.
const bool bNeedsDescriptorCache = (NumHitGroupSlots + Initializer.NumCallableShaderSlots + Initializer.NumMissShaderSlots) * Initializer.LocalBindingDataSize != 0;
if (bNeedsDescriptorCache)
{
// #dxr_todo UE-72158: Remove this when RT descriptors are sub-allocated from the global view descriptor heap.
if (GD3D12ExplicitViewDescriptorHeapOverflowReported)
{
GD3D12ExplicitViewDescriptorHeapSize = GD3D12ExplicitViewDescriptorHeapSize * 2;
GD3D12ExplicitViewDescriptorHeapOverflowReported = 0;
}
// D3D12 is guaranteed to support 1M (D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1) descriptors in a CBV/SRV/UAV heap, so clamp the size to this.
// https://docs.microsoft.com/en-us/windows/desktop/direct3d12/hardware-support
const uint32 NumViewDescriptors = FMath::Min(D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, GD3D12ExplicitViewDescriptorHeapSize);
const uint32 NumSamplerDescriptors = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
DescriptorCache = new FD3D12ExplicitDescriptorCache(Device, MaxBindingWorkers);
DescriptorCache->Init(0, NumViewDescriptors, NumSamplerDescriptors, ERHIBindlessConfiguration::RayTracing);
}
uint32 TotalDataSize = 0;
HitGroupShaderTableOffset = TotalDataSize;
TotalDataSize += NumHitGroupSlots * LocalRecordStride;
TotalDataSize = RoundUpToNextMultiple(TotalDataSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT);
CallableShaderTableOffset = TotalDataSize;
TotalDataSize += Initializer.NumCallableShaderSlots * LocalRecordStride;
TotalDataSize = RoundUpToNextMultiple(TotalDataSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT);
MissShaderTableOffset = TotalDataSize;
TotalDataSize += Initializer.NumMissShaderSlots * LocalRecordStride;
TotalDataSize = RoundUpToNextMultiple(TotalDataSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT);
Data.SetNumZeroed(TotalDataSize);
#if DO_CHECK
bWasDefaultMissShaderSet = false;
#endif
SetDefaultHitGroupIdentifier(FD3D12ShaderIdentifier::Null);
SetDefaultMissShaderIdentifier(FD3D12ShaderIdentifier::Null);
SetDefaultCallableShaderIdentifier(FD3D12ShaderIdentifier::Null);
// Keep CPU-side data after upload
Data.SetAllowCPUAccess(true);
}
if (EnumHasAnyFlags(ShaderBindingMode, ERayTracingShaderBindingMode::Inline) && Initializer.NumGeometrySegments > 0)
{
// Doesn't make sense to have inline SBT without hitgroup indexing
check(HitGroupIndexingMode == ERayTracingHitGroupIndexingMode::Allow);
const uint32 ParameterBufferSize = Initializer.NumGeometrySegments * sizeof(FD3D12HitGroupSystemParameters);
InlineGeometryParameterData.SetNumZeroed(ParameterBufferSize);
}
if (Lifetime == ERayTracingShaderBindingTableLifetime::Persistent)
{
HitRecordData.SetNum(NumHitRecords);
}
}
~FD3D12RayTracingShaderBindingTableInternal()
{
// Might be slow - iterate faster somehow?
for (int32 RecordIndex = 0; RecordIndex <= WorkerData[0].MaxUsedHitRecordIndex; ++RecordIndex)
{
ClearHitRecordData(0, RecordIndex);
}
delete DescriptorCache;
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
for (FWorkerThreadData& ThisWorkerData : WorkerData)
{
for (FD3D12ConstantBufferView* CBV : ThisWorkerData.LooseParameterCBVs)
{
delete CBV;
}
}
#endif // D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
}
template <typename T>
void SetLocalShaderParameters(uint32 ShaderTableOffset, uint32 RecordIndex, uint32 InOffsetWithinRootSignature, const T& Parameters)
{
WriteLocalShaderRecord(ShaderTableOffset, RecordIndex, ShaderIdentifierSize + InOffsetWithinRootSignature, &Parameters, sizeof(Parameters));
}
void SetLocalShaderParameters(uint32 ShaderTableOffset, uint32 RecordIndex, uint32 InOffsetWithinRootSignature, const void* InData, uint32 InDataSize)
{
WriteLocalShaderRecord(ShaderTableOffset, RecordIndex, ShaderIdentifierSize + InOffsetWithinRootSignature, InData, InDataSize);
}
template <typename T>
void CompareLocalShaderParameters(uint32 ShaderTableOffset, uint32 RecordIndex, uint32 InOffsetWithinRootSignature, const T& Parameters)
{
CompareLocalShaderRecord(ShaderTableOffset, RecordIndex, ShaderIdentifierSize + InOffsetWithinRootSignature, &Parameters, sizeof(Parameters));
}
void CompareLocalShaderParameters(uint32 ShaderTableOffset, uint32 RecordIndex, uint32 InOffsetWithinRootSignature, const void* InData, uint32 InDataSize)
{
CompareLocalShaderRecord(ShaderTableOffset, RecordIndex, ShaderIdentifierSize + InOffsetWithinRootSignature, InData, InDataSize);
}
template <typename T>
void SetMissShaderParameters(uint32 RecordIndex, uint32 InOffsetWithinRootSignature, const T& Parameters)
{
const uint32 ShaderTableOffset = MissShaderTableOffset;
WriteLocalShaderRecord(ShaderTableOffset, RecordIndex, ShaderIdentifierSize + InOffsetWithinRootSignature, &Parameters, sizeof(Parameters));
}
template <typename T>
void SetCallableShaderParameters(uint32 RecordIndex, uint32 InOffsetWithinRootSignature, const T& Parameters)
{
const uint32 ShaderTableOffset = CallableShaderTableOffset;
WriteLocalShaderRecord(ShaderTableOffset, RecordIndex, ShaderIdentifierSize + InOffsetWithinRootSignature, &Parameters, sizeof(Parameters));
}
void CopyLocalShaderParameters(uint32 InShaderTableOffset, uint32 InDestRecordIndex, uint32 InSourceRecordIndex, uint32 InOffsetWithinRootSignature)
{
const uint32 BaseOffset = InShaderTableOffset + ShaderIdentifierSize + InOffsetWithinRootSignature;
const uint32 DestOffset = BaseOffset + LocalRecordStride * InDestRecordIndex;
const uint32 SourceOffset = BaseOffset + LocalRecordStride * InSourceRecordIndex;
const uint32 CopySize = LocalRecordStride - ShaderIdentifierSize - InOffsetWithinRootSignature;
checkSlow(CopySize <= LocalRecordStride);
FMemory::Memcpy(
Data.GetData() + DestOffset,
Data.GetData() + SourceOffset,
CopySize);
}
void CopyHitGroupParameters(uint32 InDestRecordIndex, uint32 InSourceRecordIndex, uint32 InOffsetWithinRootSignature)
{
const uint32 ShaderTableOffset = HitGroupShaderTableOffset;
CopyLocalShaderParameters(ShaderTableOffset, InDestRecordIndex, InSourceRecordIndex, InOffsetWithinRootSignature);
}
void SetMissIdentifier(uint32 RecordIndex, const FD3D12ShaderIdentifier& ShaderIdentifier)
{
const uint32 WriteOffset = MissShaderTableOffset + RecordIndex * LocalRecordStride;
#if DO_CHECK
if (RecordIndex == 0)
{
bWasDefaultMissShaderSet = true;
}
#endif
WriteData(WriteOffset, ShaderIdentifier.Data, ShaderIdentifierSize);
}
void SetCallableIdentifier(uint32 RecordIndex, const FD3D12ShaderIdentifier& ShaderIdentifier)
{
const uint32 WriteOffset = CallableShaderTableOffset + RecordIndex * LocalRecordStride;
WriteData(WriteOffset, ShaderIdentifier.Data, ShaderIdentifierSize);
}
void SetDefaultHitGroupIdentifier(const FD3D12ShaderIdentifier& ShaderIdentifier)
{
const uint32 WriteOffset = HitGroupShaderTableOffset;
WriteData(WriteOffset, ShaderIdentifier.Data, ShaderIdentifierSize);
}
void SetHitGroupSystemParameters(uint32 RecordIndex, const FD3D12HitGroupSystemParameters& SystemParameters)
{
const uint32 OffsetWithinRootSignature = 0; // System parameters are always first in the RS.
const uint32 ShaderTableOffset = HitGroupShaderTableOffset;
SetLocalShaderParameters(ShaderTableOffset, RecordIndex, OffsetWithinRootSignature, SystemParameters);
}
void CompareHitGroupSystemParameters(uint32 RecordIndex, const FD3D12HitGroupSystemParameters& SystemParameters)
{
const uint32 OffsetWithinRootSignature = 0; // System parameters are always first in the RS.
const uint32 ShaderTableOffset = HitGroupShaderTableOffset;
CompareLocalShaderParameters(ShaderTableOffset, RecordIndex, OffsetWithinRootSignature, SystemParameters);
}
void SetHitGroupIdentifier(uint32 RecordIndex, const FD3D12ShaderIdentifier& ShaderIdentifier)
{
checkfSlow(ShaderIdentifier.IsValid(), TEXT("Shader identifier must be initialized FD3D12RayTracingPipelineState::GetShaderIdentifier() before use."));
checkSlow(sizeof(ShaderIdentifier.Data) >= ShaderIdentifierSize);
const uint32 WriteOffset = HitGroupShaderTableOffset + RecordIndex * LocalRecordStride;
WriteData(WriteOffset, ShaderIdentifier.Data, ShaderIdentifierSize);
}
void CompareHitGroupIdentifier(uint32 RecordIndex, const FD3D12ShaderIdentifier& ShaderIdentifier)
{
checkfSlow(ShaderIdentifier.IsValid(), TEXT("Shader identifier must be initialized FD3D12RayTracingPipelineState::GetShaderIdentifier() before use."));
checkSlow(sizeof(ShaderIdentifier.Data) >= ShaderIdentifierSize);
const uint32 WriteOffset = HitGroupShaderTableOffset + RecordIndex * LocalRecordStride;
CompareData(WriteOffset, ShaderIdentifier.Data, ShaderIdentifierSize);
}
void SetDefaultMissShaderIdentifier(const FD3D12ShaderIdentifier& ShaderIdentifier)
{
// Set all slots to the same default
for (uint32 Index = 0; Index < NumMissRecords; ++Index)
{
SetMissIdentifier(Index, ShaderIdentifier);
}
#if DO_CHECK
bWasDefaultMissShaderSet = false;
#endif
}
void SetDefaultCallableShaderIdentifier(const FD3D12ShaderIdentifier& ShaderIdentifier)
{
for (uint32 Index = 0; Index < NumCallableRecords; ++Index)
{
SetCallableIdentifier(Index, ShaderIdentifier);
}
}
void SetInlineGeometryParameters(uint32 InlineRecordIndex, const FD3D12HitGroupSystemParameters& SystemParameters)
{
const uint32 DataSize = sizeof(FD3D12HitGroupSystemParameters);
const uint32 WriteOffset = DataSize * InlineRecordIndex;
FMemory::Memcpy(&InlineGeometryParameterData[WriteOffset], &SystemParameters, DataSize);
}
void CompareInlineGeometryParameters(uint32 InlineRecordIndex, const FD3D12HitGroupSystemParameters& SystemParameters)
{
const uint32 DataSize = sizeof(FD3D12HitGroupSystemParameters);
const uint32 WriteOffset = DataSize * InlineRecordIndex;
const void* CurrentData = InlineGeometryParameterData.GetData() + WriteOffset;
ensure(FMemory::Memcmp(CurrentData, &SystemParameters, DataSize) == 0);
}
FRHISizeAndStride GetInlineBindingDataSizeAndStride() const
{
return FRHISizeAndStride { (uint64)InlineGeometryParameterData.Num(), sizeof(FD3D12HitGroupSystemParameters) };
}
void Commit(FD3D12CommandContext& Context, FRHIBuffer* InInlineBindingDataBuffer)
{
TRACE_CPUPROFILER_EVENT_SCOPE(ShaderTableCommit);
check(IsInRHIThread() || !IsRunningRHIInSeparateThread());
checkf(bIsDirty, TEXT("bIsDirty should be checked before calling Commit()"));
#if ENABLE_RESIDENCY_MANAGEMENT
// Merge all data from worker threads into the main set
{
TRACE_CPUPROFILER_EVENT_SCOPE(MergeWorkerData);
for (uint32 WorkerIndex = 1; WorkerIndex < MaxBindingWorkers; ++WorkerIndex)
{
for (FD3D12Resource* Resource : WorkerData[WorkerIndex].DynamicReferencedResources)
{
AddDynamicReferencedResource(0, Resource);
}
for (auto Iter = WorkerData[WorkerIndex].PersistentReferencedResources.CreateIterator(); Iter; ++Iter)
{
UpdatePersistentReferencedResource(0, Iter.Key(), Iter.Value());
}
WorkerData[WorkerIndex].DynamicReferencedResources.Empty();
WorkerData[WorkerIndex].DynamicReferencedResourceSet.Empty();
WorkerData[WorkerIndex].PersistentReferencedResources.Empty();
WorkerData[WorkerIndex].LastTrackedBaseShaderResources.Empty();
}
}
// Build the used persistent d3d12 resource set from the views
{
TRACE_CPUPROFILER_EVENT_SCOPE(BuildUsedResourceSet);
ReferencedResources.Empty(WorkerData[0].PersistentReferencedResources.Num() + WorkerData[0].DynamicReferencedResources.Num());
// Add tracked resources for persistent SBT records
for (auto Iter = WorkerData[0].PersistentReferencedResources.CreateIterator(); Iter; ++Iter)
{
ReferencedResources.Add(Iter.Key());
}
// Also add all the collected dynamic resources and reset for the next 'frame'
ReferencedResources.Append(WorkerData[0].DynamicReferencedResources);
WorkerData[0].DynamicReferencedResources.Empty();
WorkerData[0].DynamicReferencedResourceSet.Empty();
WorkerData[0].LastTrackedBaseShaderResources.Empty();
}
#endif // ENABLE_RESIDENCY_MANAGEMENT
// Merge max record index from worker data into index 0
for (uint32 WorkerIndex = 1; WorkerIndex < MaxBindingWorkers; ++WorkerIndex)
{
WorkerData[0].MaxUsedHitRecordIndex = FMath::Max(WorkerData[0].MaxUsedHitRecordIndex, WorkerData[WorkerIndex].MaxUsedHitRecordIndex);
WorkerData[WorkerIndex].MaxUsedHitRecordIndex = 0;
}
// Clear the record cache for reuse for all worker threads (including index 0)
for (uint32 WorkerIndex = 0; WorkerIndex < MaxBindingWorkers; ++WorkerIndex)
{
WorkerData[WorkerIndex].ShaderRecordCache.Empty();
}
FD3D12Adapter* Adapter = Device->GetParentAdapter();
if (EnumHasAnyFlags(ShaderBindingMode, ERayTracingShaderBindingMode::RTPSO))
{
checkf(Data.Num(), TEXT("Shader table is expected to be initialized before copying to GPU."));
#if DO_CHECK
// Need to split out hit binding data from miss and callable because it's possible that hit binding data wasn't set or needed (no valid binding),
// but miss data is still needed, and we also don't need to copy the complete allocated hit binding data but only upto MaxUsedHitRecordIndex
// which is not easily possible with the 1 big allocation.
//checkf(bWasDefaultMissShaderSet, TEXT("At least the first miss shader must have been set before copying to GPU."));
#endif
const uint32 BufferSize = Data.GetResourceDataSize();
D3D12_RESOURCE_DESC BufferDesc = CD3DX12_RESOURCE_DESC::Buffer(BufferSize, D3D12_RESOURCE_FLAG_NONE, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT);
const FRHIBufferCreateDesc CreateDesc =
FRHIBufferCreateDesc::Create(TEXT("ShaderBindingTable"), BufferSize, 0, BUF_Static)
.SetInitialState(ERHIAccess::CopyDest)
.SetGPUMask(FRHIGPUMask::FromIndex(Device->GetGPUIndex()));
ID3D12ResourceAllocator* ResourceAllocator = nullptr;
Buffer = Adapter->CreateRHIBuffer(
BufferDesc,
BufferDesc.Alignment,
CreateDesc,
ED3D12ResourceStateMode::MultiState,
D3D12_RESOURCE_STATE_COPY_DEST,
/*bHasInitialData*/ true
);
// Use copy queue for uploading the data
Context.BatchedSyncPoints.ToWait.Emplace(Buffer->UploadResourceDataViaCopyQueue(Context, &Data));
// Enqueue transition to SRV
Context.TransitionResource(
Buffer->GetResource(),
D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
0);
}
// Also copy geometry parameter data to the GPU buffer
if (EnumHasAnyFlags(ShaderBindingMode, ERayTracingShaderBindingMode::Inline) && WorkerData[0].MaxUsedHitRecordIndex >= 0)
{
check(InInlineBindingDataBuffer);
FD3D12Buffer* D3D12InlineBindingDataBuffer = FD3D12DynamicRHI::ResourceCast(InInlineBindingDataBuffer, Device->GetGPUIndex());
uint32 MaxUsedInlineRecordIndex = WorkerData[0].MaxUsedHitRecordIndex / NumShaderSlotsPerGeometrySegment;
uint32 MaxUsedInlineGeometryParamaterDataSize = (MaxUsedInlineRecordIndex + 1) * sizeof(FD3D12HitGroupSystemParameters);
FD3D12ResourceLocation UploadResourceLocation(Device);
void* MappedUploadMemory = Adapter->GetUploadHeapAllocator(Device->GetGPUIndex()).AllocUploadResource(MaxUsedInlineGeometryParamaterDataSize, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, UploadResourceLocation);
FMemory::Memcpy(MappedUploadMemory, InlineGeometryParameterData.GetData(), MaxUsedInlineGeometryParamaterDataSize);
FD3D12Resource* SourceResource = UploadResourceLocation.GetResource();
FD3D12Resource* DestResource = D3D12InlineBindingDataBuffer->ResourceLocation.GetResource();
Context.UpdateResidency(DestResource);
Context.CopyBufferRegionChecked(
DestResource->GetResource(), DestResource->GetName(),
D3D12InlineBindingDataBuffer->ResourceLocation.GetOffsetFromBaseOfResource(),
SourceResource->GetResource(), SourceResource->GetName(),
UploadResourceLocation.GetOffsetFromBaseOfResource(),
MaxUsedInlineGeometryParamaterDataSize
);
#if ENABLE_RESIDENCY_MANAGEMENT
// assume no references resources when inline SBT only - inline SBT only has references to IB/VB buffers and these are
// residency tracked via the TLAS
check(ShaderBindingMode != ERayTracingShaderBindingMode::Inline || ReferencedResources.IsEmpty());
#endif // ENABLE_RESIDENCY_MANAGEMENT
}
bIsDirty = false;
}
D3D12_GPU_VIRTUAL_ADDRESS GetShaderTableAddress() const
{
checkf(!bIsDirty, TEXT("Shader table update is pending, therefore GPU address is not available. Use Commit() to upload data and acquire a valid GPU buffer address."));
return Buffer->ResourceLocation.GetGPUVirtualAddress();
}
D3D12_DISPATCH_RAYS_DESC GetDispatchRaysDesc(FD3D12Device* InDevice, const FD3D12ShaderIdentifier& RayGenShaderIdentifier) const
{
check(Device == InDevice);
// Allocate memory for the ray gen shader identifier storage
check(ShaderIdentifierSize == sizeof(FD3D12ShaderIdentifier));
FD3D12ResourceLocation UploadResourceLocation(Device);
void* RayGenGPUData = Device->GetDefaultFastAllocator().Allocate(RayGenRecordStride, 256, &UploadResourceLocation);
FMemory::Memcpy(RayGenGPUData, &RayGenShaderIdentifier, ShaderIdentifierSize);
D3D12_GPU_VIRTUAL_ADDRESS RayGenStartShaderIdentifierAddress = UploadResourceLocation.GetGPUVirtualAddress();
D3D12_GPU_VIRTUAL_ADDRESS ShaderTableAddress = GetShaderTableAddress();
D3D12_DISPATCH_RAYS_DESC Desc = {};
Desc.RayGenerationShaderRecord.StartAddress = RayGenStartShaderIdentifierAddress;
Desc.RayGenerationShaderRecord.SizeInBytes = RayGenRecordStride;
Desc.MissShaderTable.StartAddress = ShaderTableAddress + MissShaderTableOffset;
Desc.MissShaderTable.StrideInBytes = LocalRecordStride;
Desc.MissShaderTable.SizeInBytes = LocalRecordStride * NumMissRecords;
if (NumCallableRecords)
{
Desc.CallableShaderTable.StartAddress = ShaderTableAddress + CallableShaderTableOffset;
Desc.CallableShaderTable.StrideInBytes = LocalRecordStride;
Desc.CallableShaderTable.SizeInBytes = NumCallableRecords * LocalRecordStride;
}
if (HitGroupIndexingMode == ERayTracingHitGroupIndexingMode::Allow)
{
Desc.HitGroupTable.StartAddress = ShaderTableAddress + HitGroupShaderTableOffset;
Desc.HitGroupTable.StrideInBytes = LocalRecordStride;
Desc.HitGroupTable.SizeInBytes = NumHitRecords * LocalRecordStride;
}
else
{
Desc.HitGroupTable.StartAddress = ShaderTableAddress + HitGroupShaderTableOffset;
Desc.HitGroupTable.StrideInBytes = 0; // Zero stride effectively disables SBT indexing
Desc.HitGroupTable.SizeInBytes = D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT; // Minimal table with only one record
}
return Desc;
}
static constexpr uint32 ShaderIdentifierSize = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES;
uint32 NumHitRecords = 0;
uint32 NumCallableRecords = 0;
uint32 NumMissRecords = 0;
uint32 MissShaderTableOffset = 0;
uint32 HitGroupShaderTableOffset = 0;
uint32 CallableShaderTableOffset = 0;
FD3D12Device* Device = nullptr;
ERayTracingShaderBindingTableLifetime Lifetime = ERayTracingShaderBindingTableLifetime::Transient;
ERayTracingHitGroupIndexingMode HitGroupIndexingMode = ERayTracingHitGroupIndexingMode::Allow;
ERayTracingShaderBindingMode ShaderBindingMode = ERayTracingShaderBindingMode::RTPSO;
uint32 NumShaderSlotsPerGeometrySegment = 0;
// Note: TABLE_BYTE_ALIGNMENT is used instead of RECORD_BYTE_ALIGNMENT to allow arbitrary switching
// between multiple RayGen and Miss shaders within the same underlying table.
static constexpr uint32 RayGenRecordStride = D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT;
uint32 LocalRecordSizeUnaligned = 0; // size of the shader identifier + local root parameters, not aligned to SHADER_RECORD_BYTE_ALIGNMENT (used for out-of-bounds access checks)
uint32 LocalRecordStride = 0; // size of shader identifier + local root parameters, aligned to SHADER_RECORD_BYTE_ALIGNMENT (same for hit groups and callable shaders)
TResourceArray<uint8, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT> Data;
// Buffer that contains per-hitrecord index and vertex buffer binding data
TResourceArray<uint8, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT> InlineGeometryParameterData;
bool bIsDirty = true;
TRefCountPtr<FD3D12Buffer> Buffer;
#if DO_CHECK
bool bWasDefaultMissShaderSet = false;
#endif
// All reference d3d12 resources from both persistent and dynamic records (rebuild during commit)
TArray<FD3D12Resource*> ReferencedResources;
// SBTs have their own descriptor heaps
FD3D12ExplicitDescriptorCache* DescriptorCache = nullptr;
struct FRecordUpdateUniformBufferListener : public ID3D12UniformBufferUpdateListener
{
FD3D12RayTracingShaderBindingTableInternal& ShaderTable;
FD3D12UniformBuffer* UniformBuffer = nullptr;
uint32 ShaderTableOffset = 0;
uint32 RecordIndex = 0;
uint32 OffsetWithinRootSignature = 0;
FRecordUpdateUniformBufferListener(FD3D12RayTracingShaderBindingTableInternal& InShaderTable) : ShaderTable(InShaderTable)
{
}
virtual ~FRecordUpdateUniformBufferListener() = default;
void Init(FD3D12UniformBuffer* InUniformBuffer, uint32 InShaderTableOffset, uint32 InRecordIndex, uint32 InOffsetWithinRootSignature)
{
UniformBuffer = InUniformBuffer;
ShaderTableOffset = InShaderTableOffset;
RecordIndex = InRecordIndex;
OffsetWithinRootSignature = InOffsetWithinRootSignature;
UniformBuffer->AddUpdateListener(this);
}
void Reset()
{
check(UniformBuffer);
UniformBuffer->RemoveUpdateListener(this);
UniformBuffer = nullptr;
}
virtual void RemoveListener(FD3D12UniformBuffer* InUpdatedUniformBuffer) final override
{
check(UniformBuffer == InUpdatedUniformBuffer);
ShaderTable.RemoveUpdateUniformBufferListener(RecordIndex, this);
}
virtual void UniformBufferUpdated(FRHICommandListBase& CmdList, FD3D12UniformBuffer* InUpdatedUniformBuffer) final override
{
check(UniformBuffer == InUpdatedUniformBuffer);
ShaderTable.SetLocalShaderParameters(ShaderTableOffset, RecordIndex, OffsetWithinRootSignature, InUpdatedUniformBuffer->ResourceLocation.GetGPUVirtualAddress());
INC_DWORD_STAT(STAT_D3D12RayTracingUBRecordUpdate);
}
};
struct FRecordUpdateRayTracingGeometryListener : public ID3D12RayTracingGeometryUpdateListener
{
FD3D12RayTracingShaderBindingTableInternal* ShaderTable = nullptr;
const FD3D12RayTracingGeometry* Geometry = nullptr;
uint32 RecordIndex = 0;
uint32 UserData = 0;
uint32 SegmentIndex = 0;
virtual ~FRecordUpdateRayTracingGeometryListener() = default;
void Init(FD3D12RayTracingShaderBindingTableInternal* InShaderTable, const FD3D12RayTracingGeometry* InGeometry, uint32 InRecordIndex, uint32 InUserData, uint32 InSegmentIndex)
{
check(Geometry == nullptr);
ShaderTable = InShaderTable;
Geometry = InGeometry;
RecordIndex = InRecordIndex;
UserData = InUserData;
SegmentIndex = InSegmentIndex;
Geometry->AddUpdateListener(this);
}
void Reset()
{
ShaderTable = nullptr;
Geometry = nullptr;
RecordIndex = 0;
UserData = 0;
SegmentIndex = 0;
}
virtual void RemoveListener(FD3D12RayTracingGeometry* InGeometry) final override
{
check(Geometry == InGeometry);
ShaderTable->RemoveUpdateRayTracingGeometryListener(RecordIndex, this);
}
virtual void HitGroupParametersUpdated(FD3D12RayTracingGeometry* InGeometry) final override
{
check(InGeometry == Geometry);
uint32 GPUIndex = ShaderTable->Device->GetGPUIndex();
FD3D12HitGroupSystemParameters SystemParameters = Geometry->HitGroupSystemParameters[GPUIndex][SegmentIndex];
SystemParameters.RootConstants.UserData = UserData;
uint32 InlineRecordIndex = ShaderTable->GetInlineRecordIndex(RecordIndex);
if (EnumHasAnyFlags(ShaderTable->ShaderBindingMode, ERayTracingShaderBindingMode::RTPSO))
{
ShaderTable->SetHitGroupSystemParameters(RecordIndex, SystemParameters);
}
if (EnumHasAnyFlags(ShaderTable->ShaderBindingMode, ERayTracingShaderBindingMode::Inline) && InlineRecordIndex != INDEX_NONE)
{
ShaderTable->SetInlineGeometryParameters(InlineRecordIndex, SystemParameters);
}
}
};
#if ENABLE_RESIDENCY_MANAGEMENT
struct FRecordUpdateShaderResourceRenameListener : public FD3D12ShaderResourceRenameListener
{
FD3D12RayTracingShaderBindingTableInternal& ShaderTable;
uint32 RecordIndex = 0;
TRefCountPtr<FD3D12BaseShaderResource> ShaderResource;
FD3D12Resource* CurrentResource = nullptr;
FRecordUpdateShaderResourceRenameListener(FD3D12RayTracingShaderBindingTableInternal& InShaderTable) : ShaderTable(InShaderTable)
{
}
virtual ~FRecordUpdateShaderResourceRenameListener() = default;
void Init(uint32 WorkerIndex, uint32 InRecordIndex, FD3D12BaseShaderResource* InShaderResource)
{
RecordIndex = InRecordIndex;
ShaderResource = InShaderResource;
CurrentResource = InShaderResource->ResourceLocation.GetResource();
ShaderResource->AddRenameListener(this);
if (CurrentResource)
{
ShaderTable.UpdatePersistentReferencedResource(WorkerIndex, CurrentResource, 1);
}
}
void Reset(uint32 WorkerIndex)
{
check(ShaderResource);
if (CurrentResource)
{
ShaderTable.UpdatePersistentReferencedResource(WorkerIndex, CurrentResource, -1);
ShaderTable.AddLastTrackedBaseShaderResource(WorkerIndex, ShaderResource);
}
ShaderResource->RemoveRenameListener(this);
ShaderResource = nullptr;
RecordIndex = 0;
CurrentResource = 0;
}
virtual void ResourceRenamed(FD3D12ContextArray const& Contexts, FD3D12BaseShaderResource* InRenamedResource, FD3D12ResourceLocation* InNewResourceLocation) override
{
check(InRenamedResource == ShaderResource);
FD3D12Resource* NewResource = InNewResourceLocation->GetResource();
// only care if actual d3d12 resource has changed
if (NewResource != CurrentResource)
{
// Resource could be streamed in or out and then contain empty d3d12 backing resource
if (CurrentResource)
{
ShaderTable.UpdatePersistentReferencedResource(0, CurrentResource, -1);
}
if (NewResource)
{
ShaderTable.UpdatePersistentReferencedResource(0, NewResource, 1);
}
CurrentResource = NewResource;
}
}
};
struct FRecordUpdateTextureReferenceReplaceListener : public FD3D12TextureReferenceReplaceListener, public FD3D12ShaderResourceRenameListener
{
FD3D12RayTracingShaderBindingTableInternal& ShaderTable;
uint32 RecordIndex = 0;
TRefCountPtr<FD3D12RHITextureReference> TextureReference;
FD3D12Resource* CurrentResource = nullptr;
FRecordUpdateTextureReferenceReplaceListener(FD3D12RayTracingShaderBindingTableInternal& InShaderTable) : ShaderTable(InShaderTable)
{
}
virtual ~FRecordUpdateTextureReferenceReplaceListener() = default;
void Init(uint32 WorkerIndex, uint32 InRecordIndex, FD3D12RHITextureReference* InTextureReference)
{
RecordIndex = InRecordIndex;
TextureReference = InTextureReference;
TextureReference->AddReplaceListener(this);
// Handle rename events on the references texture as well
FD3D12Texture* ReferencedTexture = FD3D12DynamicRHI::ResourceCast(TextureReference->GetReferencedTexture());
ReferencedTexture->AddRenameListener(this);
CurrentResource = ReferencedTexture->ResourceLocation.GetResource();
check(CurrentResource);
ShaderTable.UpdatePersistentReferencedResource(WorkerIndex, CurrentResource, 1);
}
void Reset(uint32 WorkerIndex)
{
check(TextureReference && CurrentResource);
ShaderTable.UpdatePersistentReferencedResource(WorkerIndex, CurrentResource, -1);
FD3D12Texture* ReferencedTexture = FD3D12DynamicRHI::ResourceCast(TextureReference->GetReferencedTexture());
ReferencedTexture->RemoveRenameListener(this);
TextureReference->RemoveReplaceListener(this);
ShaderTable.AddLastTrackedBaseShaderResource(WorkerIndex, ReferencedTexture);
TextureReference = nullptr;
RecordIndex = 0;
CurrentResource = 0;
}
virtual void TextureReplaced(FD3D12ContextArray const& Contexts, FD3D12RHITextureReference* InTextureReference, FD3D12Texture* CurrentTexture, FD3D12Texture* NewTexture) override
{
check(TextureReference == InTextureReference);
FD3D12Resource* NewResource = NewTexture->ResourceLocation.GetResource();
// only care if actual d3d12 resource has changed
if (NewResource != CurrentResource)
{
ShaderTable.UpdatePersistentReferencedResource(0, CurrentResource, -1);
ShaderTable.UpdatePersistentReferencedResource(0, NewResource, 1);
CurrentResource = NewResource;
}
// Also update the rename listener
CurrentTexture->RemoveRenameListener(this);
NewTexture->AddRenameListener(this);
}
virtual void ResourceRenamed(FD3D12ContextArray const& Contexts, FD3D12BaseShaderResource* InRenamedResource, FD3D12ResourceLocation* InNewResourceLocation) override
{
FD3D12Texture* ReferencedTexture = FD3D12DynamicRHI::ResourceCast(TextureReference->GetReferencedTexture());
check(InRenamedResource == ReferencedTexture);
FD3D12Resource* NewResource = InNewResourceLocation->GetResource();
// only care if actual d3d12 resource has changed
if (NewResource != CurrentResource)
{
ShaderTable.UpdatePersistentReferencedResource(0, CurrentResource, -1);
ShaderTable.UpdatePersistentReferencedResource(0, NewResource, 1);
CurrentResource = NewResource;
}
}
};
#endif // ENABLE_RESIDENCY_MANAGEMENT
// Tracked data for each record so stored GPU data can be updated after defrag operation
struct FRecordData
{
enum class EState
{
Uninitialized,
Persistent,
Transient,
};
void Reset()
{
#if ENABLE_RESIDENCY_MANAGEMENT
ReferencedShaderResourceListeners.Empty();
ReferencedTextureReferenceListeners.Empty();
#endif // ENABLE_RESIDENCY_MANAGEMENT
UpdateGeometryListener.Reset();
UpdateUniformBufferListeners.Empty();
#if DO_CHECK
LooseParameterData.Empty();
#endif //DO_CHECK
State = EState::Uninitialized;
}
EState State = EState::Uninitialized;
TArray<FRecordUpdateUniformBufferListener*> UpdateUniformBufferListeners;
FRecordUpdateRayTracingGeometryListener UpdateGeometryListener;
FD3D12ResourceLocation* LooseParameterResourceLocation = nullptr;
#if ENABLE_RESIDENCY_MANAGEMENT
TArray<FRecordUpdateShaderResourceRenameListener*> ReferencedShaderResourceListeners;
TArray<FRecordUpdateTextureReferenceReplaceListener*> ReferencedTextureReferenceListeners;
#endif // ENABLE_RESIDENCY_MANAGEMENT
#if DO_CHECK
TArray<uint8> LooseParameterData;
#endif //DO_CHECK
};
TArray<FRecordData> HitRecordData;
uint32 GetInlineRecordIndex(uint32 RecordIndex) const
{
// Only care about shader slot 0 for inline geometry parameters -> remap the record index
return (RecordIndex % NumShaderSlotsPerGeometrySegment == 0) ? RecordIndex / NumShaderSlotsPerGeometrySegment : INDEX_NONE;
}
void SetHitGroupGeometrySystemParameters(uint32 WorkerIndex, ERayTracingLocalShaderBindingType BindingType, uint32 RecordIndex, const FD3D12RayTracingGeometry* Geometry, uint32 UserData, uint32 GeometrySegmentIndex)
{
FD3D12HitGroupSystemParameters SystemParameters = Geometry->HitGroupSystemParameters[Device->GetGPUIndex()][GeometrySegmentIndex];
SystemParameters.RootConstants.UserData = UserData;
WorkerData[WorkerIndex].MaxUsedHitRecordIndex = FMath::Max(WorkerData[WorkerIndex].MaxUsedHitRecordIndex, (int32)RecordIndex);
uint32 InlineRecordIndex = GetInlineRecordIndex(RecordIndex);
if (Lifetime == ERayTracingShaderBindingTableLifetime::Transient)
{
if (EnumHasAnyFlags(ShaderBindingMode, ERayTracingShaderBindingMode::RTPSO))
{
SetHitGroupSystemParameters(RecordIndex, SystemParameters);
}
if (EnumHasAnyFlags(ShaderBindingMode, ERayTracingShaderBindingMode::Inline) && InlineRecordIndex != INDEX_NONE)
{
SetInlineGeometryParameters(InlineRecordIndex, SystemParameters);
}
}
else
{
FRecordData& RecordData = HitRecordData[RecordIndex];
bool bSetData = BindingType != ERayTracingLocalShaderBindingType::Validation;
// Special use case when trying to set transient binding on already persistent stored binding (validate content only then)
// Can happen we go from persistent -> transient -> persistent when RTPSO is still compiling for example
if (RecordData.State == FRecordData::EState::Persistent && BindingType == ERayTracingLocalShaderBindingType::Transient)
{
bSetData = false;
}
if (bSetData)
{
check(RecordData.State != FRecordData::EState::Persistent || BindingType == ERayTracingLocalShaderBindingType::Transient);
if (EnumHasAnyFlags(ShaderBindingMode, ERayTracingShaderBindingMode::RTPSO))
{
SetHitGroupSystemParameters(RecordIndex, SystemParameters);
}
if (EnumHasAnyFlags(ShaderBindingMode, ERayTracingShaderBindingMode::Inline) && InlineRecordIndex != INDEX_NONE)
{
SetInlineGeometryParameters(InlineRecordIndex, SystemParameters);
}
// If persistently stored then also listen to hit record updates for stream in/out events of the ray tracing geometry data
if (BindingType == ERayTracingLocalShaderBindingType::Persistent)
{
HitRecordData[RecordIndex].UpdateGeometryListener.Init(this, Geometry, RecordIndex, UserData, GeometrySegmentIndex);
RecordData.State = FRecordData::EState::Persistent;
}
else // if (RecordData.State == FRecordData::EState::Uninitialized)
{
RecordData.State = FRecordData::EState::Transient;
}
}
else
{
check(RecordData.State == FRecordData::EState::Persistent);
if (EnumHasAnyFlags(ShaderBindingMode, ERayTracingShaderBindingMode::RTPSO))
{
CompareHitGroupSystemParameters(RecordIndex, SystemParameters);
}
if (EnumHasAnyFlags(ShaderBindingMode, ERayTracingShaderBindingMode::Inline) && InlineRecordIndex != INDEX_NONE)
{
CompareInlineGeometryParameters(InlineRecordIndex, SystemParameters);
}
}
}
}
#if ENABLE_RESIDENCY_MANAGEMENT
void AddLastTrackedBaseShaderResource(uint32 WorkerIndex, FD3D12BaseShaderResource* BaseShaderResource)
{
WorkerData[WorkerIndex].LastTrackedBaseShaderResources.Add(BaseShaderResource);
}
void UpdatePersistentReferencedResource(uint32 WorkerIndex, FD3D12Resource* Resource, int32 Count)
{
check(Resource);
// Update the worker thread reference count of the resource
int32& ReferenceCount = WorkerData[WorkerIndex].PersistentReferencedResources.FindOrAdd(Resource);
ReferenceCount += Count;
if (WorkerIndex == 0)
{
check(ReferenceCount >= 0);
if (ReferenceCount == 0)
{
WorkerData[WorkerIndex].PersistentReferencedResources.Remove(Resource);
}
}
}
void AddDynamicReferencedResource(uint32 WorkerIndex, FD3D12Resource* Resource)
{
bool bIsAlreadyInSet = false;
WorkerData[WorkerIndex].DynamicReferencedResourceSet.Add(Resource, &bIsAlreadyInSet);
if (!bIsAlreadyInSet)
{
WorkerData[WorkerIndex].DynamicReferencedResources.Add(Resource);
}
}
#endif // ENABLE_RESIDENCY_MANAGEMENT
void AddReferencedShaderResource(uint32 WorkerIndex, uint32 RecordIndex, ERayTracingLocalShaderBindingType BindingType, FD3D12BaseShaderResource* ShaderResource)
{
#if ENABLE_RESIDENCY_MANAGEMENT
if (BindingType == ERayTracingLocalShaderBindingType::Transient || Lifetime != ERayTracingShaderBindingTableLifetime::Persistent)
{
AddDynamicReferencedResource(WorkerIndex, ShaderResource->GetResource());
}
else if (BindingType == ERayTracingLocalShaderBindingType::Persistent)
{
// Same base shader resource can be bound multiple times to the same record - only track once
FRecordData& RecordData = HitRecordData[RecordIndex];
for (FRecordUpdateShaderResourceRenameListener* Listener : RecordData.ReferencedShaderResourceListeners)
{
if (Listener->ShaderResource == ShaderResource)
{
return;
}
}
// Update the tracked record data so it can be updated in case of defrag operation
FRecordUpdateShaderResourceRenameListener* Listener = new FRecordUpdateShaderResourceRenameListener(*this);
Listener->Init(WorkerIndex, RecordIndex, ShaderResource);
RecordData.ReferencedShaderResourceListeners.Add(Listener);
}
#endif // ENABLE_RESIDENCY_MANAGEMENT
}
void AddReferencedTexture(uint32 WorkerIndex, uint32 RecordIndex, ERayTracingLocalShaderBindingType BindingType, FRHITexture* RHITexture)
{
#if ENABLE_RESIDENCY_MANAGEMENT
if (BindingType == ERayTracingLocalShaderBindingType::Transient || Lifetime != ERayTracingShaderBindingTableLifetime::Persistent)
{
FD3D12Texture* Texture = FD3D12CommandContext::RetrieveTexture(RHITexture, Device->GetGPUIndex());
AddDynamicReferencedResource(WorkerIndex, Texture->ResourceLocation.GetResource());
}
else if (BindingType == ERayTracingLocalShaderBindingType::Persistent)
{
// Collect all texture references independent of the shader resources because the internal shader resource can be changed on swap
if (FRHITextureReference* TextureReferenceRHI = RHITexture->GetTextureReference())
{
FD3D12RHITextureReference* TextureReference = FD3D12CommandContext::RetrieveObject<FD3D12RHITextureReference>(TextureReferenceRHI, Device->GetGPUIndex());
// Same texture reference can be bound multiple times to the same record - only track once
FRecordData& RecordData = HitRecordData[RecordIndex];
for (FRecordUpdateTextureReferenceReplaceListener* Listener : RecordData.ReferencedTextureReferenceListeners)
{
if (Listener->TextureReference == TextureReference)
{
return;
}
}
// Update the tracked record data so it can be updated in case of texture reference swap
FRecordUpdateTextureReferenceReplaceListener* Listener = new FRecordUpdateTextureReferenceReplaceListener(*this);
Listener->Init(WorkerIndex, RecordIndex, TextureReference);
RecordData.ReferencedTextureReferenceListeners.Add(Listener);
}
else
{
// Fall back to regular base shader resource code path
FD3D12Texture* Texture = FD3D12CommandContext::RetrieveTexture(RHITexture, Device->GetGPUIndex());
AddReferencedShaderResource(WorkerIndex, RecordIndex, BindingType, Texture);
}
}
#endif // ENABLE_RESIDENCY_MANAGEMENT
}
void AddReferencedUniformBuffer(uint32 WorkerIndex, uint32 RecordIndex, uint32 ShaderTableOffset, uint32 InOffsetWithinRootSignature, ERayTracingLocalShaderBindingType BindingType, FD3D12UniformBuffer* UniformBuffer)
{
// Uniform buffer shouldn't have an residency handles because they are all allocated in upload memory so doesn't require extra tracking
check(UniformBuffer->ResourceLocation.GetResource()->GetResidencyHandles().IsEmpty());
if (Lifetime == ERayTracingShaderBindingTableLifetime::Persistent && BindingType == ERayTracingLocalShaderBindingType::Persistent)
{
FRecordData& RecordData = HitRecordData[RecordIndex];
for (FRecordUpdateUniformBufferListener* Listener : RecordData.UpdateUniformBufferListeners)
{
check(Listener->UniformBuffer != UniformBuffer);
}
FRecordUpdateUniformBufferListener* Listener = new FRecordUpdateUniformBufferListener(*this);
Listener->Init(UniformBuffer, ShaderTableOffset, RecordIndex, InOffsetWithinRootSignature);
RecordData.UpdateUniformBufferListeners.Add(Listener);
}
}
FD3D12ConstantBufferView* SetLooseParameterData(
uint32 WorkerIndex,
uint32 RecordIndex,
ERayTracingLocalShaderBindingType BindingType,
const void* LooseParameterData,
uint32 LooseParameterDataSize,
D3D12_GPU_VIRTUAL_ADDRESS& OutGPUVirtualAddress)
{
// If we see a significant number of LooseParameter allocations coming through this path, we should consider
// caching constant buffer blocks inside ShaderTable and linearly sub-allocate from them.
// If the amount of data is relatively small, it may also be possible to use root constants and avoid extra allocations entirely.
FD3D12ConstantBufferView* ConstantBufferView = nullptr;
if (BindingType == ERayTracingLocalShaderBindingType::Validation)
{
#if DO_CHECK
FRecordData& RecordData = HitRecordData[RecordIndex];
check(RecordData.LooseParameterData.Num() == LooseParameterDataSize);
check(FMemory::Memcmp(RecordData.LooseParameterData.GetData(), LooseParameterData, LooseParameterDataSize) == 0);
check(RecordData.LooseParameterResourceLocation);
OutGPUVirtualAddress = RecordData.LooseParameterResourceLocation->GetGPUVirtualAddress();
#endif // DO_CHECK
}
else
{
// Always allocate using the upload heap allocator and not the TransientUniformBufferAllocator (can cause GPU crashes with transient data somehow)
FD3D12ResourceLocation ResourceLocation(Device);
FD3D12UploadHeapAllocator& Allocator = Device->GetParentAdapter()->GetUploadHeapAllocator(Device->GetGPUIndex());
void* MappedData = Allocator.AllocUploadResource(LooseParameterDataSize, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, ResourceLocation);
FMemory::Memcpy(MappedData, LooseParameterData, LooseParameterDataSize);
OutGPUVirtualAddress = ResourceLocation.GetGPUVirtualAddress();
// Uniform buffer shouldn't have an residency handles because they are all allocated in upload memory so doesn't require residency tracking
check(ResourceLocation.GetResource()->GetResidencyHandles().IsEmpty());
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
// Not implemented for persistent SBT yet (CBV needs to be managed per record then instead of at SBT level)
//
// CBV also has pointer to stack local ResourceLocation which it shouldn't touch anymore after creation but
// will still have dangling pointer - might be better to always allocate ResourceLocation as well on heap next to CBV (like persistent path)
check(Lifetime == ERayTracingShaderBindingTableLifetime::Transient);
ConstantBufferView = new FD3D12ConstantBufferView(Device, nullptr);
ConstantBufferView->CreateView(&ResourceLocation, 0, Align(LooseParameterDataSize, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT));
WorkerData[WorkerIndex].LooseParameterCBVs.Add(ConstantBufferView);
#endif // D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
if (Lifetime == ERayTracingShaderBindingTableLifetime::Persistent)
{
// Move ownership of the upload memory to the record data when persistent (otherwise auto freed next 'frame')
FRecordData& RecordData = HitRecordData[RecordIndex];
if (BindingType == ERayTracingLocalShaderBindingType::Persistent)
{
check(RecordData.LooseParameterResourceLocation == nullptr);
RecordData.LooseParameterResourceLocation = new FD3D12ResourceLocation(Device);
FD3D12ResourceLocation::TransferOwnership(*RecordData.LooseParameterResourceLocation, ResourceLocation);
#if DO_CHECK
RecordData.LooseParameterData.SetNum(LooseParameterDataSize);
FMemory::Memcpy(RecordData.LooseParameterData.GetData(), LooseParameterData, LooseParameterDataSize);
#endif // DO_CHECK
}
// If we had loose parameter resource location allocated then it means it was persistent and now temporarily transient
// Update the cached loose parameter GPU allocation to make sure the SBT doesn't contain any dangling upload memory allocations
else if (RecordData.LooseParameterResourceLocation)
{
#if DO_CHECK
check(RecordData.LooseParameterData.Num() == LooseParameterDataSize);
check(FMemory::Memcmp(RecordData.LooseParameterData.GetData(), LooseParameterData, LooseParameterDataSize) == 0);
#endif // DO_CHECK
RecordData.LooseParameterResourceLocation->Clear();
FD3D12ResourceLocation::TransferOwnership(*RecordData.LooseParameterResourceLocation, ResourceLocation);
}
}
}
return ConstantBufferView;
}
void ClearHitRecordData(uint32 WorkerIndex, uint32 RecordIndex)
{
if (Lifetime != ERayTracingShaderBindingTableLifetime::Persistent)
{
return;
}
FRecordData& RecordData = HitRecordData[RecordIndex];
for (FRecordUpdateUniformBufferListener* Listener : RecordData.UpdateUniformBufferListeners)
{
Listener->Reset();
delete Listener;
}
if (RecordData.LooseParameterResourceLocation)
{
delete RecordData.LooseParameterResourceLocation;
RecordData.LooseParameterResourceLocation = nullptr;
}
if (RecordData.UpdateGeometryListener.Geometry)
{
RecordData.UpdateGeometryListener.Geometry->RemoveUpdateListener(&RecordData.UpdateGeometryListener);
}
#if ENABLE_RESIDENCY_MANAGEMENT
for (FRecordUpdateShaderResourceRenameListener* Listener : RecordData.ReferencedShaderResourceListeners)
{
Listener->Reset(WorkerIndex);
delete Listener;
}
for (FRecordUpdateTextureReferenceReplaceListener* Listener : RecordData.ReferencedTextureReferenceListeners)
{
Listener->Reset(WorkerIndex);
delete Listener;
}
#endif // ENABLE_RESIDENCY_MANAGEMENT
// Reset record to default state
RecordData.Reset();
}
void RemoveUpdateUniformBufferListener(uint32 RecordIndex, FRecordUpdateUniformBufferListener* Listener)
{
bool bFound = false;
for (int32 Index = 0; Index < HitRecordData[RecordIndex].UpdateUniformBufferListeners.Num(); ++Index)
{
if (HitRecordData[RecordIndex].UpdateUniformBufferListeners[Index] == Listener)
{
HitRecordData[RecordIndex].UpdateUniformBufferListeners.RemoveAt(Index);
delete Listener;
bFound = true;
break;
}
}
check(bFound);
}
void RemoveUpdateRayTracingGeometryListener(uint32 RecordIndex, FRecordUpdateRayTracingGeometryListener* Listener)
{
FRecordData& RecordData = HitRecordData[RecordIndex];
check(&RecordData.UpdateGeometryListener == Listener && RecordData.UpdateGeometryListener.Geometry != nullptr);
RecordData.UpdateGeometryListener.Reset();
}
struct FShaderRecordCacheKey
{
static constexpr uint32 MaxUniformBuffers = 6;
FRHIUniformBuffer* const* UniformBuffers[MaxUniformBuffers];
uint64 Hash = 0;
uint32 NumUniformBuffers = 0;
uint32 ShaderIndex = 0;
FShaderRecordCacheKey() = default;
FShaderRecordCacheKey(uint32 InNumUniformBuffers, FRHIUniformBuffer* const* InUniformBuffers, uint32 InShaderIndex)
{
ShaderIndex = InShaderIndex;
check(InNumUniformBuffers <= MaxUniformBuffers);
NumUniformBuffers = FMath::Min(MaxUniformBuffers, InNumUniformBuffers);
const uint64 DataSizeInBytes = sizeof(FRHIUniformBuffer*) * NumUniformBuffers;
FMemory::Memcpy(UniformBuffers, InUniformBuffers, DataSizeInBytes);
Hash = FXxHash64::HashBuffer(UniformBuffers, DataSizeInBytes).Hash;
}
bool operator == (const FShaderRecordCacheKey& Other) const
{
if (Hash != Other.Hash) return false;
if (ShaderIndex != Other.ShaderIndex) return false;
if (NumUniformBuffers != Other.NumUniformBuffers) return false;
for (uint32 BufferIndex = 0; BufferIndex < NumUniformBuffers; ++BufferIndex)
{
if (UniformBuffers[BufferIndex] != Other.UniformBuffers[BufferIndex]) return false;
}
return true;
}
friend uint32 GetTypeHash(const FShaderRecordCacheKey& Key)
{
return uint32(Key.Hash);
}
};
void UpdateResidency(FD3D12CommandContext& CommandContext) const
{
// Skip redundant resource residency updates when a shader table is repeatedly used on the same command list
bool bWasAlreadyInSet = false;
CommandContext.RayTracingShaderTables.FindOrAdd(UniqueId, bWasAlreadyInSet);
if (bWasAlreadyInSet)
{
return;
}
TRACE_CPUPROFILER_EVENT_SCOPE(ShaderTableUpdateResidency);
for (FD3D12Resource* Resource : ReferencedResources)
{
CommandContext.UpdateResidency(Resource);
}
CommandContext.UpdateResidency(Buffer->GetResource());
}
struct alignas(PLATFORM_CACHE_LINE_SIZE) FWorkerThreadData
{
Experimental::TSherwoodMap<FShaderRecordCacheKey, uint32> ShaderRecordCache;
int32 MaxUsedHitRecordIndex = -1;
#if ENABLE_RESIDENCY_MANAGEMENT
// Track all persistent resources with number of refs accross all records (when ref reaches 0 then it doesn't need be tracked anymore)
TMap<TRefCountPtr<FD3D12Resource>, int32> PersistentReferencedResources;
// Track last reference to base shader resources because PersistentReferencedResources could have the last reference to the d3d12 resource
// and we can't delete d3d12baseshaderresource with committed resource when it doesn't hold the last reference to it
TSet<TRefCountPtr<FD3D12BaseShaderResource>> LastTrackedBaseShaderResources;
// A set of all resources referenced by this shader table for the purpose of updating residency before ray tracing work dispatch.
Experimental::TSherwoodSet<void*> DynamicReferencedResourceSet;
TArray<TRefCountPtr<FD3D12Resource>> DynamicReferencedResources;
#endif // ENABLE_RESIDENCY_MANAGEMENT
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
TArray<FD3D12ConstantBufferView*> LooseParameterCBVs;
#endif // D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
};
FWorkerThreadData WorkerData[MaxBindingWorkers];
const uint64 UniqueId;
UE::FMutex DispatchMutex;
private:
static std::atomic_uint64_t NextUniqueId;
};
std::atomic_uint64_t FD3D12RayTracingShaderBindingTableInternal::NextUniqueId = 0;
static void CreateSpecializedStateObjects(
ID3D12Device5* RayTracingDevice,
ID3D12RootSignature* GlobalRootSignature,
uint32 MaxAttributeSizeInBytes,
uint32 MaxPayloadSizeInBytes,
const FD3D12RayTracingShaderLibrary& RayGenShaders,
const TArray<FD3D12RayTracingPipelineCache::FEntry*>& UniqueShaderCollections,
const TMap<FSHAHash, int32>& RayGenShaderIndexByHash,
TArray<TRefCountPtr<ID3D12StateObject>>& OutSpecializedStateObjects,
TArray<int32>& OutSpecializationIndices)
{
static constexpr uint32 MaxSpecializationBuckets = FD3D12RayTracingPipelineInfo::MaxPerformanceGroups;
if (RayGenShaders.Shaders.Num() <= 1)
{
// No specializations needed
return;
}
// Initialize raygen shader PSO specialization map to default values
OutSpecializationIndices.Reserve(RayGenShaders.Shaders.Num());
for (int32 It = 0; It < RayGenShaders.Shaders.Num(); ++It)
{
OutSpecializationIndices.Add(INDEX_NONE);
}
struct FRayGenShaderSpecialization
{
D3D12_EXISTING_COLLECTION_DESC Desc = {};
int32 ShaderIndex = INDEX_NONE;
};
TArray<FRayGenShaderSpecialization> RayGenShaderCollectionBuckets[MaxSpecializationBuckets];
TArray<D3D12_EXISTING_COLLECTION_DESC> ShaderCollectionDescs;
// Find useful performance group range for non-raygen shaders.
// It is not necessary to create PSO specializations for high-occupancy RGS if overall PSO will be limited by low-occupancy hit shaders.
// Also not necessary to create specializations if all raygen shaders are already in the same group.
uint32 MaxPerformanceGroupRGS = 0;
uint32 MinPerformanceGroupRGS = MaxSpecializationBuckets - 1;
uint32 MaxPerformanceGroupOther = 0;
uint32 MinPerformanceGroupOther = MaxSpecializationBuckets - 1;
int32 LastRayGenShaderCollectionIndex = INDEX_NONE;
for (int32 EntryIndex = 0; EntryIndex < UniqueShaderCollections.Num(); ++EntryIndex)
{
FD3D12RayTracingPipelineCache::FEntry* Entry = UniqueShaderCollections[EntryIndex];
const uint32 Group = FMath::Min<uint32>(Entry->PipelineInfo.PerformanceGroup, MaxSpecializationBuckets);
if (Entry->CollectionType == FD3D12RayTracingPipelineCache::ECollectionType::RayGen)
{
MaxPerformanceGroupRGS = FMath::Max<uint32>(MaxPerformanceGroupRGS, Group);
MinPerformanceGroupRGS = FMath::Min<uint32>(MinPerformanceGroupRGS, Group);
LastRayGenShaderCollectionIndex = EntryIndex;
}
else
{
checkf(EntryIndex > LastRayGenShaderCollectionIndex, TEXT("Ray generation shaders are expected to be first in the UniqueShaderCollections list."));
MaxPerformanceGroupOther = FMath::Max<uint32>(MaxPerformanceGroupOther, Group);
MinPerformanceGroupOther = FMath::Min<uint32>(MinPerformanceGroupOther, Group);
// This is a hit/miss/callable shader which will be common for all specialized RTPSOs.
ShaderCollectionDescs.Add(Entry->GetCollectionDesc());
}
}
if (MinPerformanceGroupRGS == MaxPerformanceGroupRGS)
{
// No need to create a specialized PSO if all raygen shaders are already in the same group
return;
}
// Split RGS collections into a separate lists, organized by performance group
for (int32 EntryIndex = 0; EntryIndex <= LastRayGenShaderCollectionIndex; ++EntryIndex)
{
FD3D12RayTracingPipelineCache::FEntry* Entry = UniqueShaderCollections[EntryIndex];
check(Entry->CollectionType == FD3D12RayTracingPipelineCache::ECollectionType::RayGen);
// Don't create specializations for raygen shaders that have better occupancy than worst non-raygen shader
const uint32 SpecializationBucket = FMath::Min<uint32>(Entry->PipelineInfo.PerformanceGroup, MinPerformanceGroupOther);
// Don't create extra specialized pipelines for group 0 (worst-performing) and just use the default RTPSO.
if (SpecializationBucket > 0)
{
FRayGenShaderSpecialization Specialization;
Specialization.Desc = Entry->GetCollectionDesc();
Specialization.ShaderIndex = RayGenShaderIndexByHash.FindChecked(Entry->Shader->GetHash());
RayGenShaderCollectionBuckets[SpecializationBucket].Add(Specialization);
}
}
OutSpecializedStateObjects.Reserve(MaxSpecializationBuckets);
const uint32 ShaderCollectionDescsSize = ShaderCollectionDescs.Num();
for (const TArray<FRayGenShaderSpecialization>& SpecializationBucket : RayGenShaderCollectionBuckets)
{
if (SpecializationBucket.IsEmpty())
{
continue;
}
const int32 SpecializationIndex = OutSpecializedStateObjects.Num();
for (const FRayGenShaderSpecialization& Specialization : SpecializationBucket)
{
// Temporarily add the RGSs to complete shader collection
ShaderCollectionDescs.Add(Specialization.Desc);
OutSpecializationIndices[Specialization.ShaderIndex] = SpecializationIndex;
}
TRefCountPtr<ID3D12StateObject> SpecializedPSO = CreateRayTracingStateObject(
RayTracingDevice,
{}, // Libraries,
{}, // LibraryExports,
MaxAttributeSizeInBytes,
MaxPayloadSizeInBytes,
{}, // HitGroups
GlobalRootSignature,
{}, // LocalRootSignatures
{}, // LocalRootSignatureAssociations,
ShaderCollectionDescs,
D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE);
OutSpecializedStateObjects.Add(SpecializedPSO);
// Remove the temporary RGSs
ShaderCollectionDescs.SetNum(ShaderCollectionDescsSize);
}
}
FD3D12RayTracingPipelineState::FD3D12RayTracingPipelineState(FD3D12Device* Device, const FRayTracingPipelineStateInitializer& Initializer) :
FRHIRayTracingPipelineState(Initializer),
Device(Device)
{
SCOPE_CYCLE_COUNTER(STAT_RTPSO_CreatePipeline);
TRACE_CPUPROFILER_EVENT_SCOPE(RTPSO_CreatePipeline);
checkf(Initializer.GetRayGenTable().Num() > 0 || Initializer.bPartial, TEXT("Ray tracing pipelines must have at leat one ray generation shader."));
checkf(Initializer.bPartial || Initializer.GetHitGroupTable().Num() > 0, TEXT("Ray tracing pipelines must have at leat one hit shader."));
uint64 TotalCreationTime = 0;
uint64 CompileTime = 0;
uint64 LinkTime = 0;
uint32 NumCacheHits = 0;
TotalCreationTime -= FPlatformTime::Cycles64();
ID3D12Device5* RayTracingDevice = Device->GetDevice5();
TArrayView<FRHIRayTracingShader*> InitializerHitGroups = Initializer.GetHitGroupTable();
TArrayView<FRHIRayTracingShader*> InitializerMissShaders = Initializer.GetMissTable();
TArrayView<FRHIRayTracingShader*> InitializerRayGenShaders = Initializer.GetRayGenTable();
TArrayView<FRHIRayTracingShader*> InitializerCallableShaders = Initializer.GetCallableTable();
FRHIShaderBindingLayout ShaderBindingLayout = Initializer.ShaderBindingLayout ? *Initializer.ShaderBindingLayout : FRHIShaderBindingLayout();
const uint32 MaxTotalShaders = InitializerRayGenShaders.Num() + InitializerMissShaders.Num() + InitializerHitGroups.Num() + InitializerCallableShaders.Num();
checkf(MaxTotalShaders >= 1, TEXT("Ray tracing pipelines are expected to contain at least one shader"));
FD3D12RayTracingPipelineCache* PipelineCache = Device->GetRayTracingPipelineCache();
// All raygen shaders must share the same global root signature (this is validated below)
GlobalRootSignature = PipelineCache->GetGlobalRootSignature(ShaderBindingLayout);
const FD3D12RayTracingPipelineState* BasePipeline = GRHISupportsRayTracingPSOAdditions
? FD3D12DynamicRHI::ResourceCast(Initializer.BasePipeline.GetReference())
: nullptr;
if (BasePipeline)
{
PipelineShaderHashes = BasePipeline->PipelineShaderHashes;
}
PipelineShaderHashes.Reserve(MaxTotalShaders);
TArray<FD3D12RayTracingPipelineCache::FEntry*> UniqueShaderCollections;
UniqueShaderCollections.Reserve(MaxTotalShaders);
FGraphEventArray CompileCompletionList;
CompileCompletionList.Reserve(MaxTotalShaders);
// Helper function to acquire a D3D12_EXISTING_COLLECTION_DESC for a compiled shader via cache
auto AddShaderCollection = [Device, ShaderBindingLayoutHash = ShaderBindingLayout.GetHash(), GlobalRootSignature = this->GlobalRootSignature, PipelineCache,
&UniqueShaderHashes = this->PipelineShaderHashes, &UniqueShaderCollections, &Initializer, &NumCacheHits, &CompileTime,
&CompileCompletionList]
(FD3D12RayTracingShader* Shader, FD3D12RayTracingPipelineCache::ECollectionType CollectionType)
{
// verify that that the same shader binding layout is used for all shaders in the RTPSO or not sampling any resources
uint32 TotalResourceCount = Shader->ResourceCounts.NumCBs + Shader->ResourceCounts.NumSRVs + Shader->ResourceCounts.NumUAVs + Shader->ResourceCounts.NumSamplers;
if (TotalResourceCount != 0 && Shader->ShaderBindingLayoutHash != ShaderBindingLayoutHash)
{
UE_LOG(LogD3D12RHI, Warning, TEXT("Raytracing shader with with entry point %s, name %s and ShaderBindingLayout hash %u doesn't match the RTPSO ShaderBindingLayout hash %u."),
*Shader->EntryPoint, Shader->GetShaderName(), Shader->ShaderBindingLayoutHash, ShaderBindingLayoutHash);
ensure(false);
}
bool bIsAlreadyInSet = false;
const uint64 ShaderHash = GetShaderHash64(Shader);
UniqueShaderHashes.Add(ShaderHash, &bIsAlreadyInSet);
bool bCacheHit = false;
CompileTime -= FPlatformTime::Cycles64();
FD3D12RayTracingPipelineCache::FEntry* ShaderCacheEntry = PipelineCache->GetOrCompileShader(
Device, Shader, GlobalRootSignature,
Initializer.MaxAttributeSizeInBytes,
Initializer.MaxPayloadSizeInBytes,
!Initializer.bPartial, //< partial PSOs are not required
CollectionType, CompileCompletionList,
&bCacheHit);
CompileTime += FPlatformTime::Cycles64();
if (!bIsAlreadyInSet)
{
UniqueShaderCollections.Add(ShaderCacheEntry);
if (bCacheHit) NumCacheHits++;
}
return ShaderCacheEntry;
};
// Add ray generation shaders
TArray<FD3D12RayTracingPipelineCache::FEntry*> RayGenShaderEntries;
RayGenShaders.Reserve(InitializerRayGenShaders.Num());
RayGenShaderEntries.Reserve(InitializerRayGenShaders.Num());
TMap<FSHAHash, int32> RayGenShaderIndexByHash;
checkf(UniqueShaderCollections.Num() == 0, TEXT("Ray generation shaders are expected to be first in the UniqueShaderCollections list."));
for (FRHIRayTracingShader* ShaderRHI : InitializerRayGenShaders)
{
FD3D12RayTracingShader* Shader = FD3D12DynamicRHI::ResourceCast(ShaderRHI);
checkf(!Shader->UsesGlobalUniformBuffer(), TEXT("Global uniform buffers are not implemented for ray generation shaders"));
FD3D12RayTracingPipelineCache::FEntry* ShaderCacheEntry = AddShaderCollection(Shader, FD3D12RayTracingPipelineCache::ECollectionType::RayGen);
RayGenShaderEntries.Add(ShaderCacheEntry);
RayGenShaderIndexByHash.Add(Shader->GetHash(), RayGenShaders.Shaders.Num());
RayGenShaders.Shaders.Add(Shader);
}
MaxHitGroupViewDescriptors = 0;
MaxLocalRootSignatureSize = 0;
// Add miss shaders
TArray<FD3D12RayTracingPipelineCache::FEntry*> MissShaderEntries;
MissShaders.Reserve(InitializerMissShaders.Num());
MissShaderEntries.Reserve(InitializerMissShaders.Num());
for (FRHIRayTracingShader* ShaderRHI : InitializerMissShaders)
{
FD3D12RayTracingShader* Shader = FD3D12DynamicRHI::ResourceCast(ShaderRHI);
checkf(Shader, TEXT("A valid ray tracing shader must be provided for all elements in the FRayTracingPipelineStateInitializer miss shader table."));
checkf(!Shader->UsesGlobalUniformBuffer(), TEXT("Global uniform buffers are not implemented for ray tracing miss shaders"));
const uint32 ShaderViewDescriptors = Shader->ResourceCounts.NumSRVs + Shader->ResourceCounts.NumUAVs;
MaxHitGroupViewDescriptors = FMath::Max(MaxHitGroupViewDescriptors, ShaderViewDescriptors);
MaxLocalRootSignatureSize = FMath::Max(MaxLocalRootSignatureSize, Shader->LocalRootSignature->GetTotalRootSignatureSizeInBytes());
FD3D12RayTracingPipelineCache::FEntry* ShaderCacheEntry = AddShaderCollection(Shader, FD3D12RayTracingPipelineCache::ECollectionType::Miss);
MissShaderEntries.Add(ShaderCacheEntry);
MissShaders.Shaders.Add(Shader);
}
// Add hit groups
TArray<FD3D12RayTracingPipelineCache::FEntry*> HitGroupEntries;
HitGroupShaders.Reserve(InitializerHitGroups.Num());
HitGroupEntries.Reserve(InitializerHitGroups.Num());
for (FRHIRayTracingShader* ShaderRHI : InitializerHitGroups)
{
FD3D12RayTracingShader* Shader = FD3D12DynamicRHI::ResourceCast(ShaderRHI);
checkf(Shader, TEXT("A valid ray tracing hit group shader must be provided for all elements in the FRayTracingPipelineStateInitializer hit group table."));
const uint32 ShaderViewDescriptors = Shader->ResourceCounts.NumSRVs + Shader->ResourceCounts.NumUAVs;
MaxHitGroupViewDescriptors = FMath::Max(MaxHitGroupViewDescriptors, ShaderViewDescriptors);
MaxLocalRootSignatureSize = FMath::Max(MaxLocalRootSignatureSize, Shader->LocalRootSignature->GetTotalRootSignatureSizeInBytes());
FD3D12RayTracingPipelineCache::FEntry* ShaderCacheEntry = AddShaderCollection(Shader, FD3D12RayTracingPipelineCache::ECollectionType::HitGroup);
HitGroupEntries.Add(ShaderCacheEntry);
HitGroupShaders.Shaders.Add(Shader);
}
// Add callable shaders
TArray<FD3D12RayTracingPipelineCache::FEntry*> CallableShaderEntries;
CallableShaders.Reserve(InitializerCallableShaders.Num());
CallableShaderEntries.Reserve(InitializerCallableShaders.Num());
for (FRHIRayTracingShader* ShaderRHI : InitializerCallableShaders)
{
FD3D12RayTracingShader* Shader = FD3D12DynamicRHI::ResourceCast(ShaderRHI);
checkf(Shader, TEXT("A valid ray tracing shader must be provided for all elements in the FRayTracingPipelineStateInitializer callable shader table."));
checkf(!Shader->UsesGlobalUniformBuffer(), TEXT("Global uniform buffers are not implemented for ray tracing callable shaders"));
const uint32 ShaderViewDescriptors = Shader->ResourceCounts.NumSRVs + Shader->ResourceCounts.NumUAVs;
MaxHitGroupViewDescriptors = FMath::Max(MaxHitGroupViewDescriptors, ShaderViewDescriptors);
MaxLocalRootSignatureSize = FMath::Max(MaxLocalRootSignatureSize, Shader->LocalRootSignature->GetTotalRootSignatureSizeInBytes());
FD3D12RayTracingPipelineCache::FEntry* ShaderCacheEntry = AddShaderCollection(Shader, FD3D12RayTracingPipelineCache::ECollectionType::Callable);
CallableShaderEntries.Add(ShaderCacheEntry);
CallableShaders.Shaders.Add(Shader);
}
check(Initializer.GetMaxLocalBindingDataSize() >= MaxLocalRootSignatureSize);
// Wait for all compilation tasks to be complete and then gather the compiled collection descriptors
CompileTime -= FPlatformTime::Cycles64();
FTaskGraphInterface::Get().WaitUntilTasksComplete(CompileCompletionList);
CompileTime += FPlatformTime::Cycles64();
if (Initializer.bPartial)
{
// Partial pipelines don't have a linking phase, so exit immediately after compilation tasks are complete.
return;
}
TArray<D3D12_EXISTING_COLLECTION_DESC> UniqueShaderCollectionDescs;
UniqueShaderCollectionDescs.Reserve(MaxTotalShaders);
for (FD3D12RayTracingPipelineCache::FEntry* Entry : UniqueShaderCollections)
{
UniqueShaderCollectionDescs.Add(Entry->GetCollectionDesc());
}
// Link final RTPSO from shader collections
LinkTime -= FPlatformTime::Cycles64();
// Extending RTPSOs is currently not compatible with PSO specializations
if (BasePipeline && GRayTracingSpecializeStateObjects == 0)
{
if (UniqueShaderCollectionDescs.Num() == 0)
{
// New PSO does not actually have any new shaders that were not in the base
StateObject = BasePipeline->StateObject.GetReference();
}
else
{
TArray<D3D12_STATE_SUBOBJECT> Subobjects;
int32 SubobjectIndex = 0;
Subobjects.Reserve(UniqueShaderCollectionDescs.Num() + 1);
D3D12_STATE_OBJECT_CONFIG StateObjectConfig = {};
StateObjectConfig.Flags = D3D12_STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS;
Subobjects.Add(D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_STATE_OBJECT_CONFIG, &StateObjectConfig });
for (const D3D12_EXISTING_COLLECTION_DESC& Collection : UniqueShaderCollectionDescs)
{
Subobjects.Add(D3D12_STATE_SUBOBJECT{ D3D12_STATE_SUBOBJECT_TYPE_EXISTING_COLLECTION, &Collection });
}
D3D12_STATE_OBJECT_DESC Desc = {};
Desc.Type = D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE;
Desc.NumSubobjects = Subobjects.Num();
Desc.pSubobjects = Subobjects.GetData();
ID3D12Device7* Device7 = Device->GetDevice7();
VERIFYD3D12RESULT(Device7->AddToStateObject(&Desc,
BasePipeline->StateObject.GetReference(),
IID_PPV_ARGS(StateObject.GetInitReference())));
}
}
else
{
StateObject = CreateRayTracingStateObject(
RayTracingDevice,
{}, // Libraries,
{}, // LibraryExports,
Initializer.MaxAttributeSizeInBytes,
Initializer.MaxPayloadSizeInBytes,
{}, // HitGroups
GlobalRootSignature,
{}, // LocalRootSignatures
{}, // LocalRootSignatureAssociations,
UniqueShaderCollectionDescs,
D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE);
if (StateObject == nullptr)
{
UE_LOG(LogD3D12RHI, Fatal, TEXT("Failed to a create raytracing pipeline state"));
}
}
if (GRayTracingSpecializeStateObjects != 0 && Initializer.GetRayGenTable().Num() > 1)
{
CreateSpecializedStateObjects(
RayTracingDevice,
GlobalRootSignature,
Initializer.MaxAttributeSizeInBytes,
Initializer.MaxPayloadSizeInBytes,
RayGenShaders,
UniqueShaderCollections,
RayGenShaderIndexByHash,
SpecializedStateObjects, // out param
SpecializationIndices // out param
);
}
LinkTime += FPlatformTime::Cycles64();
HRESULT QueryInterfaceResult = StateObject->QueryInterface(IID_PPV_ARGS(PipelineProperties.GetInitReference()));
checkf(SUCCEEDED(QueryInterfaceResult), TEXT("Failed to query pipeline properties from the ray tracing pipeline state object. Result=%08x"), QueryInterfaceResult);
// Query shader identifiers from the pipeline state object
check(HitGroupEntries.Num() == InitializerHitGroups.Num());
auto GetEntryShaderIdentifier = [Properties = PipelineProperties.GetReference()](FD3D12RayTracingPipelineCache::FEntry* Entry) -> FD3D12ShaderIdentifier
{
if (Entry->Identifier.IsValid())
{
return Entry->Identifier;
}
else
{
return GetShaderIdentifier(Properties, Entry->GetPrimaryExportNameChars());
}
};
HitGroupShaders.Identifiers.SetNumUninitialized(InitializerHitGroups.Num());
for (int32 HitGroupIndex = 0; HitGroupIndex < HitGroupEntries.Num(); ++HitGroupIndex)
{
HitGroupShaders.Identifiers[HitGroupIndex] = GetEntryShaderIdentifier(HitGroupEntries[HitGroupIndex]);
}
RayGenShaders.Identifiers.SetNumUninitialized(RayGenShaderEntries.Num());
for (int32 ShaderIndex = 0; ShaderIndex < RayGenShaderEntries.Num(); ++ShaderIndex)
{
RayGenShaders.Identifiers[ShaderIndex] = GetEntryShaderIdentifier(RayGenShaderEntries[ShaderIndex]);
}
MissShaders.Identifiers.SetNumUninitialized(MissShaderEntries.Num());
for (int32 ShaderIndex = 0; ShaderIndex < MissShaderEntries.Num(); ++ShaderIndex)
{
MissShaders.Identifiers[ShaderIndex] = GetEntryShaderIdentifier(MissShaderEntries[ShaderIndex]);
}
CallableShaders.Identifiers.SetNumUninitialized(CallableShaderEntries.Num());
for (int32 ShaderIndex = 0; ShaderIndex < CallableShaderEntries.Num(); ++ShaderIndex)
{
CallableShaders.Identifiers[ShaderIndex] = GetEntryShaderIdentifier(CallableShaderEntries[ShaderIndex]);
}
PipelineStackSize = PipelineProperties->GetPipelineStackSize();
TotalCreationTime += FPlatformTime::Cycles64();
// Report stats for pipelines that take a long time to create
#if !NO_LOGGING
// Gather PSO stats
ShaderStats.Reserve(UniqueShaderCollections.Num());
for (FD3D12RayTracingPipelineCache::FEntry* Entry : UniqueShaderCollections)
{
FShaderStats Stats;
Stats.Name = *(Entry->Shader->EntryPoint);
Stats.ShaderSize = Entry->Shader->Code.Num();
Stats.CompileTimeMS = Entry->CompileTimeMS;
#if PLATFORM_WINDOWS
if (Entry->Shader->GetFrequency() == SF_RayGen)
{
Stats.StackSize = uint32(PipelineProperties->GetShaderStackSize(*(Entry->ExportNames[0])));
}
#endif // PLATFORM_WINDOWS
ShaderStats.Add(Stats);
}
ShaderStats.Sort([](const FShaderStats& A, const FShaderStats& B) { return B.CompileTimeMS < A.CompileTimeMS; });
const double TotalCreationTimeMS = 1000.0 * FPlatformTime::ToSeconds64(TotalCreationTime);
// log a message if the RTPSO creating took a long time (with a higher threshold if this was a background compilation)
const double CreationTimeWarningThresholdMS = 10.0;
const double BackgroundCreatingTimeWarningThresholdMS = 1000.0;
if (TotalCreationTimeMS > (Initializer.bBackgroundCompilation ? CreationTimeWarningThresholdMS : BackgroundCreatingTimeWarningThresholdMS))
{
const double CompileTimeMS = 1000.0 * FPlatformTime::ToSeconds64(CompileTime);
const double LinkTimeMS = 1000.0 * FPlatformTime::ToSeconds64(LinkTime);
const uint32 NumUniqueShaders = UniqueShaderCollections.Num();
UE_LOG(LogD3D12RHI, Log,
TEXT("Creating RTPSO with %d shaders (%d cached, %d new) took %.2f ms. Compile time %.2f ms, link time %.2f ms."),
PipelineShaderHashes.Num(), NumCacheHits, NumUniqueShaders - NumCacheHits, (float)TotalCreationTimeMS, (float)CompileTimeMS, (float)LinkTimeMS);
}
#endif //!NO_LOGGING
}
class FD3D12RayTracingShaderBindingTable : public FRHIShaderBindingTable, public FD3D12AdapterChild
{
public:
UE_NONCOPYABLE(FD3D12RayTracingShaderBindingTable)
FD3D12RayTracingShaderBindingTable(FRHICommandListBase& RHICmdList, FD3D12Adapter* Adapter, const FRayTracingShaderBindingTableInitializer& InInitializer)
: FRHIShaderBindingTable(InInitializer), FD3D12AdapterChild(Adapter)
{
INC_DWORD_STAT(STAT_D3D12RayTracingAllocatedSBT);
checkf(Initializer.NumMissShaderSlots >= 1, TEXT("Need at least 1 miss shader slot."));
for (FD3D12Device* Device : Adapter->GetDevices())
{
InitForDevice(RHICmdList, Device);
}
};
~FD3D12RayTracingShaderBindingTable()
{
for (auto& Table : ShaderTablesPerGPU)
{
delete Table;
Table = nullptr;
}
DEC_DWORD_STAT(STAT_D3D12RayTracingAllocatedSBT);
}
FD3D12RayTracingShaderBindingTableInternal* GetTableForDevice(FD3D12Device* Device)
{
const uint32 GPUIndex = Device->GetGPUIndex();
return ShaderTablesPerGPU[GPUIndex];
}
void ReleaseForDevice(FD3D12Device* Device)
{
const uint32 GPUIndex = Device->GetGPUIndex();
delete ShaderTablesPerGPU[GPUIndex];
ShaderTablesPerGPU[GPUIndex] = nullptr;
}
virtual FRHISizeAndStride GetInlineBindingDataSizeAndStride() const override final
{
// Size should be the same for all tables
if (ShaderTablesPerGPU[0])
{
return ShaderTablesPerGPU[0]->GetInlineBindingDataSizeAndStride();
}
return FRHISizeAndStride{0,0};
}
private:
void InitForDevice(FRHICommandListBase& RHICmdList, FD3D12Device* Device)
{
TRACE_CPUPROFILER_EVENT_SCOPE(ShaderTableInit);
SCOPE_CYCLE_COUNTER(STAT_D3D12CreateShaderTable);
const uint32 GPUIndex = Device->GetGPUIndex();
check(ShaderTablesPerGPU[GPUIndex] == nullptr);
ShaderTablesPerGPU[GPUIndex] = new FD3D12RayTracingShaderBindingTableInternal(RHICmdList, Initializer, Device);
}
FD3D12RayTracingShaderBindingTableInternal* ShaderTablesPerGPU[MAX_NUM_GPUS] = {};
};
void FD3D12Device::InitRayTracing()
{
LLM_SCOPE_BYNAME(TEXT("FD3D12RT"));
check(RayTracingPipelineCache == nullptr);
RayTracingPipelineCache = new FD3D12RayTracingPipelineCache(GetParentAdapter());
}
void FD3D12Device::CleanupRayTracing()
{
delete RayTracingPipelineCache;
RayTracingPipelineCache = nullptr;
for (FD3D12Queue& Queue : Queues)
{
delete Queue.RayTracingDispatchRaysDescBuffer;
Queue.RayTracingDispatchRaysDescBuffer = nullptr;
}
}
static D3D12_RAYTRACING_INSTANCE_FLAGS TranslateRayTracingInstanceFlags(ERayTracingInstanceFlags InFlags)
{
D3D12_RAYTRACING_INSTANCE_FLAGS Result = D3D12_RAYTRACING_INSTANCE_FLAG_NONE;
if (EnumHasAnyFlags(InFlags, ERayTracingInstanceFlags::TriangleCullDisable))
{
Result |= D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE;
}
if (!EnumHasAnyFlags(InFlags, ERayTracingInstanceFlags::TriangleCullReverse))
{
// Counterclockwise is the default for UE. Reversing culling is achieved by *not* setting this flag.
Result |= D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE;
}
if (EnumHasAnyFlags(InFlags, ERayTracingInstanceFlags::ForceOpaque))
{
Result |= D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_OPAQUE;
}
if (EnumHasAnyFlags(InFlags, ERayTracingInstanceFlags::ForceNonOpaque))
{
Result |= D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_NON_OPAQUE;
}
return Result;
}
FRayTracingAccelerationStructureSize FD3D12DynamicRHI::RHICalcRayTracingSceneSize(const FRayTracingSceneInitializer& Initializer)
{
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS BuildInputs = {};
BuildInputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
BuildInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
BuildInputs.NumDescs = Initializer.MaxNumInstances;
BuildInputs.Flags = TranslateRayTracingAccelerationStructureFlags(Initializer.BuildFlags);
FD3D12Adapter& Adapter = GetAdapter();
FRayTracingAccelerationStructureSize SizeInfo = {};
for (uint32 GPUIndex = 0; GPUIndex < GNumExplicitGPUsForRendering; ++GPUIndex)
{
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO PrebuildInfo = {};
Adapter.GetDevice(GPUIndex)->GetRaytracingAccelerationStructurePrebuildInfo(&BuildInputs, &PrebuildInfo);
SizeInfo.ResultSize = FMath::Max(SizeInfo.ResultSize, PrebuildInfo.ResultDataMaxSizeInBytes);
SizeInfo.BuildScratchSize = FMath::Max(SizeInfo.BuildScratchSize, PrebuildInfo.ScratchDataSizeInBytes);
SizeInfo.UpdateScratchSize = FMath::Max(SizeInfo.UpdateScratchSize, PrebuildInfo.UpdateScratchDataSizeInBytes);
}
return SizeInfo;
}
static ERayTracingAccelerationStructureFlags GetRayTracingAccelerationStructureBuildFlags(const FRayTracingGeometryInitializer& Initializer)
{
ERayTracingAccelerationStructureFlags BuildFlags = ERayTracingAccelerationStructureFlags::None;
if (Initializer.bFastBuild)
{
BuildFlags = ERayTracingAccelerationStructureFlags::FastBuild;
}
else
{
BuildFlags = ERayTracingAccelerationStructureFlags::FastTrace;
}
if (Initializer.bAllowUpdate)
{
EnumAddFlags(BuildFlags, ERayTracingAccelerationStructureFlags::AllowUpdate);
}
if (!Initializer.bFastBuild && !Initializer.bAllowUpdate && Initializer.bAllowCompaction && GD3D12RayTracingAllowCompaction && (uint32(GD3D12RayTracingCompactionMinPrimitiveCount) < Initializer.TotalPrimitiveCount))
{
EnumAddFlags(BuildFlags, ERayTracingAccelerationStructureFlags::AllowCompaction);
}
if (GRayTracingDebugForceBuildMode == 1)
{
EnumAddFlags(BuildFlags, ERayTracingAccelerationStructureFlags::FastBuild);
EnumRemoveFlags(BuildFlags, ERayTracingAccelerationStructureFlags::FastTrace);
}
else if (GRayTracingDebugForceBuildMode == 2)
{
EnumAddFlags(BuildFlags, ERayTracingAccelerationStructureFlags::FastTrace);
EnumRemoveFlags(BuildFlags, ERayTracingAccelerationStructureFlags::FastBuild);
}
return BuildFlags;
}
void TranslateRayTracingGeometryDescs(const FRayTracingGeometryInitializer& Initializer, TArrayView<D3D12_RAYTRACING_GEOMETRY_DESC> Output)
{
check(Output.Num() == Initializer.Segments.Num());
D3D12_RAYTRACING_GEOMETRY_TYPE GeometryType = TranslateRayTracingGeometryType(Initializer.GeometryType);
uint32 ComputedPrimitiveCountForValidation = 0;
for (int32 SegmentIndex = 0; SegmentIndex < Initializer.Segments.Num(); ++SegmentIndex)
{
const FRayTracingGeometrySegment& Segment = Initializer.Segments[SegmentIndex];
checkf(Segment.VertexBuffer, TEXT("Position vertex buffer is required for ray tracing geometry."));
checkf(Segment.VertexBufferStride, TEXT("Non-zero position vertex buffer stride is required."));
checkf(Segment.VertexBufferStride % 4 == 0, TEXT("Position vertex buffer stride must be aligned to 4 bytes for ByteAddressBuffer loads to work."));
checkf(Segment.MaxVertices != 0 || Segment.NumPrimitives == 0,
TEXT("FRayTracingGeometrySegment.MaxVertices for '%s' must contain number of positions in the vertex buffer or maximum index buffer value+1 if index buffer is provided."),
*Initializer.DebugName.ToString());
if (Initializer.GeometryType == RTGT_Triangles)
{
checkf(Segment.VertexBufferElementType == VET_Float3
|| Segment.VertexBufferElementType == VET_Float4, TEXT("Only float3/4 vertex buffers are currently implemented.")); // #dxr_todo UE-72160: support other vertex buffer formats
checkf(Segment.VertexBufferStride >= 12, TEXT("Only deinterleaved float3 position vertex buffers are currently implemented.")); // #dxr_todo UE-72160: support interleaved vertex buffers
}
else if (Initializer.GeometryType == RTGT_Procedural)
{
checkf(Segment.VertexBufferStride >= (2 * sizeof(FVector3f)), TEXT("Procedural geometry vertex buffer must contain at least 2xFloat3 that defines 3D bounding boxes of primitives."));
}
if (Initializer.IndexBuffer)
{
uint32 IndexStride = Initializer.IndexBuffer->GetStride();
check(Initializer.IndexBuffer->GetSize() >=
(Segment.FirstPrimitive + Segment.NumPrimitives) * FD3D12RayTracingGeometry::IndicesPerPrimitive * IndexStride + Initializer.IndexBufferOffset);
}
D3D12_RAYTRACING_GEOMETRY_DESC Desc = {};
Desc.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_NONE;
Desc.Type = GeometryType;
if (Segment.bForceOpaque)
{
// Deny anyhit shader invocations when this segment is hit
Desc.Flags |= D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE;
}
if (!Segment.bAllowDuplicateAnyHitShaderInvocation)
{
// Allow only a single any-hit shader invocation per primitive
Desc.Flags |= D3D12_RAYTRACING_GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION;
}
switch (GeometryType)
{
case D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES:
switch (Segment.VertexBufferElementType)
{
case VET_Float4:
// While the DXGI_FORMAT_R32G32B32A32_FLOAT format is not supported by DXR, since we manually load vertex
// data when we are building the BLAS, we can just rely on the vertex stride to offset the read index,
// and read only the 3 vertex components, and so use the DXGI_FORMAT_R32G32B32_FLOAT vertex format
case VET_Float3:
Desc.Triangles.VertexFormat = DXGI_FORMAT_R32G32B32_FLOAT;
break;
case VET_Float2:
Desc.Triangles.VertexFormat = DXGI_FORMAT_R32G32_FLOAT;
break;
case VET_Half2:
Desc.Triangles.VertexFormat = DXGI_FORMAT_R16G16_FLOAT;
break;
default:
checkNoEntry();
break;
}
if (Initializer.IndexBuffer)
{
// In some cases the geometry is created with 16 bit index buffer, but it's 32 bit at build time.
// We conservatively set this to 32 bit to allocate acceleration structure memory.
Desc.Triangles.IndexFormat = DXGI_FORMAT_R32_UINT;
Desc.Triangles.IndexCount = Segment.NumPrimitives * FD3D12RayTracingGeometry::IndicesPerPrimitive;
Desc.Triangles.VertexCount = Segment.MaxVertices;
}
else
{
// Non-indexed geometry
checkf(Initializer.Segments.Num() == 1, TEXT("Non-indexed geometry with multiple segments is not implemented."));
Desc.Triangles.IndexFormat = DXGI_FORMAT_UNKNOWN;
Desc.Triangles.VertexCount = FMath::Min<uint32>(Segment.MaxVertices, Initializer.TotalPrimitiveCount * 3);
}
break;
case D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS:
Desc.AABBs.AABBCount = Segment.NumPrimitives;
break;
default:
checkf(false, TEXT("Unexpected ray tracing geometry type"));
break;
}
ComputedPrimitiveCountForValidation += Segment.NumPrimitives;
Output[SegmentIndex] = Desc;
}
check(ComputedPrimitiveCountForValidation == Initializer.TotalPrimitiveCount);
}
FRayTracingAccelerationStructureSize FD3D12DynamicRHI::RHICalcRayTracingGeometrySize(const FRayTracingGeometryInitializer& Initializer)
{
{
const bool bHasOfflineMetadata = Initializer.OfflineDataHeader.IsValid();
if (bHasOfflineMetadata)
{
FRayTracingAccelerationStructureOfflineMetadata OfflineBvhMetadata = RHIGetRayTracingGeometryOfflineMetadata(Initializer.OfflineDataHeader);
FRayTracingAccelerationStructureSize SizeInfo = {};
SizeInfo.ResultSize = Align(OfflineBvhMetadata.Size, GRHIRayTracingAccelerationStructureAlignment);
return SizeInfo;
}
}
FRayTracingAccelerationStructureSize SizeInfo = {};
ERayTracingAccelerationStructureFlags BuildFlags = GetRayTracingAccelerationStructureBuildFlags(Initializer);
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS PrebuildDescInputs = {};
TArray<D3D12_RAYTRACING_GEOMETRY_DESC, TInlineAllocator<32>> GeometryDescs;
GeometryDescs.SetNumUninitialized(Initializer.Segments.Num());
TranslateRayTracingGeometryDescs(Initializer, GeometryDescs);
D3D12_RAYTRACING_GEOMETRY_TYPE GeometryType = TranslateRayTracingGeometryType(Initializer.GeometryType);
PrebuildDescInputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
PrebuildDescInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
PrebuildDescInputs.NumDescs = GeometryDescs.Num();
PrebuildDescInputs.pGeometryDescs = GeometryDescs.GetData();
PrebuildDescInputs.Flags = TranslateRayTracingAccelerationStructureFlags(BuildFlags);
FD3D12Adapter& Adapter = GetAdapter();
// We don't know the final index buffer format, so take maximum of 16 and 32 bit.
static const DXGI_FORMAT ValidIndexBufferFormats[] = { DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R32_UINT };
static const DXGI_FORMAT NullIndexBufferFormats[] = { DXGI_FORMAT_UNKNOWN };
TArrayView<const DXGI_FORMAT> IndexFormats = Initializer.IndexBuffer.IsValid()
? MakeArrayView(ValidIndexBufferFormats)
: MakeArrayView(NullIndexBufferFormats);
for (DXGI_FORMAT IndexFormat : IndexFormats)
{
for (D3D12_RAYTRACING_GEOMETRY_DESC& GeometryDesc : GeometryDescs)
{
if (GeometryDesc.Type == D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES)
{
GeometryDesc.Triangles.IndexFormat = IndexFormat;
}
}
// Get maximum buffer sizes for all GPUs in the system
for (uint32 GPUIndex = 0; GPUIndex < GNumExplicitGPUsForRendering; ++GPUIndex)
{
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO PrebuildInfo = {};
Adapter.GetDevice(GPUIndex)->GetRaytracingAccelerationStructurePrebuildInfo(&PrebuildDescInputs, &PrebuildInfo);
SizeInfo.ResultSize = FMath::Max(SizeInfo.ResultSize, PrebuildInfo.ResultDataMaxSizeInBytes);
SizeInfo.BuildScratchSize = FMath::Max(SizeInfo.BuildScratchSize, PrebuildInfo.ScratchDataSizeInBytes);
SizeInfo.UpdateScratchSize = FMath::Max(SizeInfo.UpdateScratchSize, PrebuildInfo.UpdateScratchDataSizeInBytes);
}
}
SizeInfo.ResultSize = Align(SizeInfo.ResultSize, GRHIRayTracingAccelerationStructureAlignment);
SizeInfo.BuildScratchSize = Align(SizeInfo.BuildScratchSize, GRHIRayTracingScratchBufferAlignment);
SizeInfo.UpdateScratchSize = Align(FMath::Max(1ULL, SizeInfo.UpdateScratchSize), GRHIRayTracingScratchBufferAlignment);
return SizeInfo;
}
FRayTracingAccelerationStructureOfflineMetadata FD3D12DynamicRHI::RHIGetRayTracingGeometryOfflineMetadata(const FRayTracingGeometryOfflineDataHeader& OfflineDataHeader)
{
static_assert(sizeof(FD3D12RayTracingOfflineBvhHeader) <= sizeof(FRayTracingGeometryOfflineDataHeader), "FRayTracingGeometryOfflineDataHeader must be large enough to fit FOfflineBVHHeader");
FD3D12RayTracingOfflineBvhHeader BvhHeader;
FMemory::Memcpy(&BvhHeader, &OfflineDataHeader, sizeof(BvhHeader));
FRayTracingAccelerationStructureOfflineMetadata Metadata;
Metadata.Size = BvhHeader.Size;
Metadata.SerializedSize = BvhHeader.SerializedSize;
Metadata.SerializedOffset = 0;
return Metadata;
}
FRayTracingPipelineStateRHIRef FD3D12DynamicRHI::RHICreateRayTracingPipelineState(const FRayTracingPipelineStateInitializer& Initializer)
{
FD3D12Device* Device = GetAdapter().GetDevice(0); // All pipelines are created on the first node, as they may be used on any other linked GPU.
FD3D12RayTracingPipelineState* Result = new FD3D12RayTracingPipelineState(Device, Initializer);
return Result;
}
FRayTracingGeometryRHIRef FD3D12DynamicRHI::RHICreateRayTracingGeometry(FRHICommandListBase& RHICmdList, const FRayTracingGeometryInitializer& Initializer)
{
FD3D12Adapter& Adapter = GetAdapter();
return new FD3D12RayTracingGeometry(RHICmdList, &Adapter, Initializer);
}
FRayTracingSceneRHIRef FD3D12DynamicRHI::RHICreateRayTracingScene(FRayTracingSceneInitializer Initializer)
{
TRACE_CPUPROFILER_EVENT_SCOPE(CreateRayTracingScene);
FD3D12Adapter& Adapter = GetAdapter();
return new FD3D12RayTracingScene(&Adapter, MoveTemp(Initializer));
}
FShaderBindingTableRHIRef FD3D12DynamicRHI::RHICreateShaderBindingTable(FRHICommandListBase& RHICmdList, const FRayTracingShaderBindingTableInitializer& Initializer)
{
TRACE_CPUPROFILER_EVENT_SCOPE(CreateRayTracingScene);
FD3D12Adapter& Adapter = GetAdapter();
return new FD3D12RayTracingShaderBindingTable(RHICmdList, &Adapter, Initializer);
}
FBufferRHIRef FD3D12RayTracingGeometry::NullTransformBuffer;
enum class ERayTracingBufferType
{
AccelerationStructure,
Scratch
};
static TRefCountPtr<FD3D12Buffer> CreateRayTracingBuffer(FD3D12Adapter* Adapter, uint32 GPUIndex, uint64 Size, ERayTracingBufferType Type, const FDebugName& DebugName)
{
FString DebugNameString = DebugName.ToString();
checkf(Size != 0, TEXT("Attempting to create ray tracing %s buffer of zero size. Debug name: %s"),
Type == ERayTracingBufferType::AccelerationStructure ? TEXT("AccelerationStructure") : TEXT("Scratch"),
*DebugNameString);
TRefCountPtr<FD3D12Buffer> Result;
ID3D12ResourceAllocator* ResourceAllocator = nullptr;
FRHIGPUMask GPUMask = FRHIGPUMask::FromIndex(GPUIndex);
bool bHasInitialData = false;
if (Type == ERayTracingBufferType::AccelerationStructure)
{
const D3D12_RESOURCE_DESC ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(Size, D3D12_RESOURCE_FLAG_NONE);
const FRHIBufferCreateDesc CreateDesc =
FRHIBufferCreateDesc::Create(*DebugNameString, Size, 0, BUF_AccelerationStructure)
.SetInitialState(ERHIAccess::BVHWrite)
.SetGPUMask(GPUMask);
Result = Adapter->CreateRHIBuffer(
ResourceDesc,
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT,
CreateDesc,
ED3D12ResourceStateMode::SingleState,
D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE
);
}
else if (Type == ERayTracingBufferType::Scratch)
{
// Scratch doesn't need single state anymore because there are only a few scratch allocations left and allocating a
// dedicated single state heap for it wastes memory - ideally all scratch allocations should be transient
const D3D12_RESOURCE_DESC ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(Size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
const FRHIBufferCreateDesc CreateDesc =
FRHIBufferCreateDesc::Create(*DebugNameString, Size, 0, BUF_UnorderedAccess)
.SetInitialState(ERHIAccess::BVHWrite)
.SetGPUMask(GPUMask);
Result = Adapter->CreateRHIBuffer(
ResourceDesc,
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT,
CreateDesc,
ED3D12ResourceStateMode::Default,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS
);
// Elevates the scratch buffer heap priority, which may help performance / stability in low memory conditions
// (Acceleration structure already boosted from allocation side)
ID3D12Pageable* HeapResource = Result->GetResource()->GetPageable();
Adapter->SetResidencyPriority(HeapResource, D3D12_RESIDENCY_PRIORITY_HIGH, GPUIndex);
}
else
{
checkNoEntry();
}
return Result;
}
FString GetGeometryInitializerDebugString(const FRayTracingGeometryInitializer& Initializer)
{
TStringBuilder<128> Result;
Result << "DebugName=" << Initializer.DebugName.ToString();
Result << " NumSegments=" << Initializer.Segments.Num();
Result << " NumPrims=" << Initializer.TotalPrimitiveCount;
if (Initializer.IndexBuffer)
{
Result << " IndexStride=" << Initializer.IndexBuffer->GetStride();
}
else
{
Result << " NonIndexed";
}
if (Initializer.OfflineData)
{
Result << " HasOfflineData";
}
return Result.ToString();
}
FD3D12RayTracingGeometry::FD3D12RayTracingGeometry(FRHICommandListBase& RHICmdList, FD3D12Adapter* Adapter, const FRayTracingGeometryInitializer& InInitializer)
: FRHIRayTracingGeometry(InInitializer), FD3D12AdapterChild(Adapter)
{
INC_DWORD_STAT(STAT_D3D12RayTracingAllocatedBLAS);
static const FName NAME_BLAS(TEXT("BLAS"));
DebugName = !Initializer.DebugName.IsNone() ? Initializer.DebugName : NAME_BLAS;
OwnerName = Initializer.OwnerName;
FMemory::Memzero(bHasPendingCompactionRequests);
FMemory::Memzero(bRegisteredAsRenameListener);
if(!FD3D12RayTracingGeometry::NullTransformBuffer.IsValid())
{
TArray<float> NullTransformData;
NullTransformData.SetNumZeroed(12);
FD3D12RayTracingGeometry::NullTransformBuffer = UE::RHIResourceUtils::CreateVertexBufferFromArray(
RHICmdList,
TEXT("NullTransformBuffer"),
EBufferUsageFlags::Static,
MakeConstArrayView(NullTransformData)
);
}
RegisterD3D12RayTracingGeometry(this);
checkf(Initializer.Segments.Num() > 0, TEXT("Ray tracing geometry must be initialized with at least one segment."));
GeometryDescs.SetNumUninitialized(Initializer.Segments.Num());
TranslateRayTracingGeometryDescs(Initializer, GeometryDescs);
SetDirty(FRHIGPUMask::All(), true);
const bool bHasOfflineMetadata = Initializer.OfflineDataHeader.IsValid();
FRayTracingAccelerationStructureOfflineMetadata OfflineBvhMetadata;
if (bHasOfflineMetadata)
{
OfflineBvhMetadata = RHIGetRayTracingGeometryOfflineMetadata(InInitializer.OfflineDataHeader);
}
const void* SourceData = nullptr;
if (Initializer.OfflineData != nullptr)
{
checkf(bHasOfflineMetadata, TEXT("OfflineData provided in Initializer has data but OfflineDataHeader is not valid."));
SourceData = Initializer.OfflineData->GetResourceData();
const uint32 SourceDataSize = Initializer.OfflineData->GetResourceDataSize();
checkf(SourceDataSize >= OfflineBvhMetadata.SerializedSize, TEXT("OfflineData provided in Initializer has %u bytes but FD3D12RayTracingGeometry expected %u bytes."), SourceDataSize, OfflineBvhMetadata.SerializedSize);
}
if (SourceData != nullptr)
{
checkf(!InInitializer.bAllowUpdate, TEXT("FD3D12RayTracingGeometry doesn't support updating BVH created using offline data."));
}
// Compute the required size of the in-memory BVH buffer
if (SourceData != nullptr)
{
SizeInfo.ResultSize = OfflineBvhMetadata.Size;
SizeInfo.BuildScratchSize = 0;
SizeInfo.UpdateScratchSize = 0;
AccelerationStructureCompactedSize = OfflineBvhMetadata.Size;
}
else
{
// Get maximum buffer sizes for all GPUs in the system
SizeInfo = RHICalcRayTracingGeometrySize(Initializer);
}
checkf(SizeInfo.ResultSize != 0,
TEXT("Unexpected acceleration structure buffer size (0).\nGeometry initializer details:\n%s"),
*GetGeometryInitializerDebugString(Initializer));
// If this RayTracingGeometry going to be used as streaming destination
// we don't want to allocate its memory as it will be replaced later by streamed version
// but we still need correct SizeInfo as it is used to estimate its memory requirements outside of RHI.
if (Initializer.Type == ERayTracingGeometryInitializerType::StreamingDestination)
{
return;
}
// Allocate acceleration structure buffer
FOREACH_GPU(GPUIndex < MAX_NUM_GPUS && GPUIndex < GNumExplicitGPUsForRendering,
{
AccelerationStructureBuffers[GPUIndex] = CreateRayTracingBuffer(Adapter, GPUIndex, SizeInfo.ResultSize, ERayTracingBufferType::AccelerationStructure, DebugName);
AccelerationStructureBuffers[GPUIndex]->SetOwnerName(OwnerName);
INC_MEMORY_STAT_BY(STAT_D3D12RayTracingUsedVideoMemory, AccelerationStructureBuffers[GPUIndex]->GetSize());
INC_MEMORY_STAT_BY(STAT_D3D12RayTracingBLASMemory, AccelerationStructureBuffers[GPUIndex]->GetSize());
if (Initializer.bAllowUpdate)
{
INC_MEMORY_STAT_BY(STAT_D3D12RayTracingDynamicBLASMemory, AccelerationStructureBuffers[GPUIndex]->GetSize());
}
else
{
INC_MEMORY_STAT_BY(STAT_D3D12RayTracingStaticBLASMemory, AccelerationStructureBuffers[GPUIndex]->GetSize());
}
});
INC_DWORD_STAT_BY(STAT_D3D12RayTracingTrianglesBLAS, Initializer.TotalPrimitiveCount);
const bool bForRendering = Initializer.Type == ERayTracingGeometryInitializerType::Rendering;
if (SourceData != nullptr)
{
FD3D12Device* Device = Adapter->GetDevice(0);
FD3D12ResourceLocation SrcResourceLoc(Device);
uint8* DstDataBase = (uint8*)Adapter->GetUploadHeapAllocator(0).AllocUploadResource(OfflineBvhMetadata.SerializedSize, 256, SrcResourceLoc);
FMemory::Memcpy(DstDataBase, SourceData, OfflineBvhMetadata.SerializedSize);
RHICmdList.EnqueueLambda([this, SrcResourceLoc = MoveTemp(SrcResourceLoc), bForRendering](FRHICommandListBase& ExecutingCmdList)
{
FOREACH_GPU(GPUIndex < MAX_NUM_GPUS && GPUIndex < GNumExplicitGPUsForRendering,
{
FD3D12CommandContext& Context = FD3D12CommandContext::Get(ExecutingCmdList, GPUIndex);
FD3D12Buffer* AccelerationStructure = AccelerationStructureBuffers[GPUIndex];
Context.RayTracingCommandList()->CopyRaytracingAccelerationStructure(
AccelerationStructure->ResourceLocation.GetGPUVirtualAddress(),
SrcResourceLoc.GetGPUVirtualAddress(),
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_DESERIALIZE
);
Context.UpdateResidency(SrcResourceLoc.GetResource());
Context.ConditionalSplitCommandList();
if (bForRendering)
{
RegisterAsRenameListener(GPUIndex);
SetupHitGroupSystemParameters(GPUIndex);
}
});
SetDirty(FRHIGPUMask::All(), false);
});
Initializer.OfflineData->Discard();
}
else
{
// Offline data already registered via FD3D12RHICommandInitializeRayTracingGeometry
FOREACH_GPU(GPUIndex < MAX_NUM_GPUS && GPUIndex < GNumExplicitGPUsForRendering,
{
RegisterAsRenameListener(GPUIndex);
});
}
}
void FD3D12RayTracingGeometry::Swap(FD3D12RayTracingGeometry& Other)
{
FOREACH_GPU(GPUIndex < MAX_NUM_GPUS,
{
::Swap(AccelerationStructureBuffers[GPUIndex], Other.AccelerationStructureBuffers[GPUIndex]);
::Swap(bIsAccelerationStructureDirty[GPUIndex], Other.bIsAccelerationStructureDirty[GPUIndex]);
});
::Swap(AccelerationStructureCompactedSize, Other.AccelerationStructureCompactedSize);
FOREACH_GPU(GPUIndex < MAX_NUM_GPUS && GPUIndex < GNumExplicitGPUsForRendering,
{
UnregisterAsRenameListener(GPUIndex);
});
Initializer = Other.Initializer;
DebugName = !Initializer.DebugName.IsNone() ? Initializer.DebugName : FName(TEXT("BLAS"));
checkf(Initializer.Segments.Num() > 0, TEXT("Ray tracing geometry must be initialized with at least one segment."));
GeometryDescs.SetNumUninitialized(Initializer.Segments.Num());
TranslateRayTracingGeometryDescs(Initializer, GeometryDescs);
FOREACH_GPU(GPUIndex < MAX_NUM_GPUS && GPUIndex < GNumExplicitGPUsForRendering,
{
RegisterAsRenameListener(GPUIndex);
SetupHitGroupSystemParameters(GPUIndex);
});
}
void FD3D12RayTracingGeometry::ReleaseUnderlyingResource()
{
UnregisterD3D12RayTracingGeometry(this);
// Remove compaction request if still pending
FOREACH_GPU(GPUIndex < MAX_NUM_GPUS,
{
if (bHasPendingCompactionRequests[GPUIndex])
{
check(AccelerationStructureBuffers[GPUIndex]);
FD3D12Device* Device = AccelerationStructureBuffers[GPUIndex].GetReference()->GetParentDevice();
bool bRequestFound = Device->GetRayTracingCompactionRequestHandler()->ReleaseRequest(this);
check(bRequestFound);
bHasPendingCompactionRequests[GPUIndex] = false;
}
});
// Unregister as dependent resource on vertex and index buffers & clear the SRVs
FOREACH_GPU(GPUIndex < MAX_NUM_GPUS,
{
HitGroupSystemIndexBufferSRV[GPUIndex].Reset();
HitGroupSystemSegmentVertexBufferSRVs[GPUIndex].Empty();
UnregisterAsRenameListener(GPUIndex);
});
for (TRefCountPtr<FD3D12Buffer>& Buffer : AccelerationStructureBuffers)
{
if (Buffer)
{
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingUsedVideoMemory, Buffer->GetSize());
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingBLASMemory, Buffer->GetSize());
ERayTracingAccelerationStructureFlags BuildFlags = GetRayTracingAccelerationStructureBuildFlags(Initializer);
if (EnumHasAllFlags(BuildFlags, ERayTracingAccelerationStructureFlags::AllowUpdate))
{
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingDynamicBLASMemory, Buffer->GetSize());
}
else
{
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingStaticBLASMemory, Buffer->GetSize());
}
}
}
if (Initializer.Type != ERayTracingGeometryInitializerType::StreamingSource)
{
DEC_DWORD_STAT_BY(STAT_D3D12RayTracingTrianglesBLAS, Initializer.TotalPrimitiveCount);
DEC_DWORD_STAT(STAT_D3D12RayTracingAllocatedBLAS);
}
// Reset members
for (TRefCountPtr<FD3D12Buffer>& Buffer : AccelerationStructureBuffers)
{
Buffer.SafeRelease();
}
Initializer = {};
AccelerationStructureCompactedSize = 0;
GeometryDescs = {};
for (TArray<FD3D12HitGroupSystemParameters>& HitGroupParametersForGPU : HitGroupSystemParameters)
{
HitGroupParametersForGPU.Empty();
}
}
FD3D12RayTracingGeometry::~FD3D12RayTracingGeometry()
{
// RT geometry can be destroyed before persistent SBT records are cleared from the cached MDCs
// because they are still pending removal from the scene primitives
for (ID3D12RayTracingGeometryUpdateListener* UpdateListener : UpdateListeners)
{
UpdateListener->RemoveListener(this);
}
UpdateListeners.Empty();
ReleaseUnderlyingResource();
}
void FD3D12RayTracingGeometry::AllocateBufferSRVs(uint32 InGPUIndex)
{
HitGroupSystemIndexBufferSRV[InGPUIndex].Reset();
HitGroupSystemSegmentVertexBufferSRVs[InGPUIndex].Empty();
// Procedural doesn't need any SRVs for index buffer
if (Initializer.IndexBuffer && Initializer.GeometryType == RTGT_Triangles)
{
checkf((Initializer.IndexBufferOffset % RHI_RAW_VIEW_ALIGNMENT) == 0, TEXT("The byte offset of raw views must be a multiple of %d (specified offset: %d)."), RHI_RAW_VIEW_ALIGNMENT, Initializer.IndexBufferOffset);
FD3D12Buffer* IndexBuffer = FD3D12DynamicRHI::ResourceCast(Initializer.IndexBuffer.GetReference());
// Initializer.TotalPrimitiveCount is the accumulated num primitives of the segments
// The highest indexed entry can be higher due to Segment.FirstPrimitive or it can be lower if segments overlap
// So here we calculate the highest indexed entry by looping over the segments
uint32 MaxPrimitiveCount = 0;
for (const FRayTracingGeometrySegment& Segment : Initializer.Segments)
{
MaxPrimitiveCount = FMath::Max(MaxPrimitiveCount, Segment.FirstPrimitive + Segment.NumPrimitives);
}
D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {};
SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
SRVDesc.Format = DXGI_FORMAT_R32_TYPELESS;
SRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
SRVDesc.Buffer.FirstElement = (Initializer.IndexBufferOffset + IndexBuffer->ResourceLocation.GetOffsetFromBaseOfResource()) >> 2u;
SRVDesc.Buffer.NumElements = FMath::Max((uint32)1, ((MaxPrimitiveCount * 3 * IndexBuffer->GetStride()) + 3) >> 2u);
SRVDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
SRVDesc.Buffer.StructureByteStride = 0;
HitGroupSystemIndexBufferSRV[InGPUIndex] = MakeShared<FD3D12ShaderResourceView>(GetParentAdapter()->GetDevice(InGPUIndex), InGPUIndex > 0 ? HitGroupSystemIndexBufferSRV[0].Get() : nullptr);
HitGroupSystemIndexBufferSRV[InGPUIndex]->CreateView(IndexBuffer, SRVDesc, FD3D12ShaderResourceView::EFlags::None);
}
for (const FRayTracingGeometrySegment& Segment : Initializer.Segments)
{
checkf((Segment.VertexBufferOffset % RHI_RAW_VIEW_ALIGNMENT) == 0, TEXT("The byte offset of raw views must be a multiple of %d (specified offset: %d)."), RHI_RAW_VIEW_ALIGNMENT, Segment.VertexBufferOffset);
FD3D12Buffer* VertexBuffer = FD3D12DynamicRHI::ResourceCast(Segment.VertexBuffer.GetReference());
D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {};
SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
SRVDesc.Format = DXGI_FORMAT_R32_TYPELESS;
SRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
SRVDesc.Buffer.FirstElement = (Segment.VertexBufferOffset + VertexBuffer->ResourceLocation.GetOffsetFromBaseOfResource()) >> 2u;
if (Initializer.GeometryType == RTGT_Procedural)
{
SRVDesc.Buffer.NumElements = ((Segment.NumPrimitives * Segment.VertexBufferStride) + 3) / 4; //< NumElements in R32 size
}
else
{
SRVDesc.Buffer.NumElements = FMath::Max((uint32)1, ((Segment.MaxVertices * Segment.VertexBufferStride) + 3) / 4); //< NumElements in R32 size
}
SRVDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
SRVDesc.Buffer.StructureByteStride = 0;
FD3D12ShaderResourceView* FirstLinkedObject = nullptr;
if (InGPUIndex > 0)
{
int32 SegmentIndex = HitGroupSystemSegmentVertexBufferSRVs[InGPUIndex].Num();
if (HitGroupSystemSegmentVertexBufferSRVs[0].Num() > SegmentIndex)
{
FirstLinkedObject = HitGroupSystemSegmentVertexBufferSRVs[0][SegmentIndex].Get();
}
}
TSharedPtr<FD3D12ShaderResourceView> VertexBufferSRV = MakeShared<FD3D12ShaderResourceView>(GetParentAdapter()->GetDevice(InGPUIndex), FirstLinkedObject);
VertexBufferSRV->CreateView(VertexBuffer, SRVDesc, FD3D12ShaderResourceView::EFlags::None);
HitGroupSystemSegmentVertexBufferSRVs[InGPUIndex].Add(VertexBufferSRV);
}
}
void FD3D12RayTracingGeometry::RegisterAsRenameListener(uint32 InGPUIndex)
{
// Not needed if bindless
if (AreBindlessResourcesEnabledForRayTracing(GetParentAdapter()))
{
return;
}
check(!bRegisteredAsRenameListener[InGPUIndex]);
FD3D12Buffer* IndexBuffer = FD3D12DynamicRHI::ResourceCast(Initializer.IndexBuffer.GetReference(), InGPUIndex);
if (IndexBuffer)
{
IndexBuffer->AddRenameListener(this);
}
TArray<FD3D12Buffer*, TInlineAllocator<1>> UniqueVertexBuffers;
UniqueVertexBuffers.Reserve(Initializer.Segments.Num());
for (const FRayTracingGeometrySegment& Segment : Initializer.Segments)
{
FD3D12Buffer* VertexBuffer = FD3D12DynamicRHI::ResourceCast(Segment.VertexBuffer.GetReference(), InGPUIndex);
if (VertexBuffer && !UniqueVertexBuffers.Contains(VertexBuffer))
{
VertexBuffer->AddRenameListener(this);
UniqueVertexBuffers.Add(VertexBuffer);
}
}
bRegisteredAsRenameListener[InGPUIndex] = true;
}
void FD3D12RayTracingGeometry::UnregisterAsRenameListener(uint32 InGPUIndex)
{
if (!bRegisteredAsRenameListener[InGPUIndex])
{
return;
}
check(!AreBindlessResourcesEnabledForRayTracing(GetParentAdapter()));
FD3D12Buffer* IndexBuffer = FD3D12DynamicRHI::ResourceCast(Initializer.IndexBuffer.GetReference(), InGPUIndex);
if (IndexBuffer)
{
IndexBuffer->RemoveRenameListener(this);
}
TArray<FD3D12Buffer*, TInlineAllocator<1>> UniqueVertexBuffers;
UniqueVertexBuffers.Reserve(Initializer.Segments.Num());
for (const FRayTracingGeometrySegment& Segment : Initializer.Segments)
{
FD3D12Buffer* VertexBuffer = FD3D12DynamicRHI::ResourceCast(Segment.VertexBuffer.GetReference(), InGPUIndex);
if (VertexBuffer && !UniqueVertexBuffers.Contains(VertexBuffer))
{
VertexBuffer->RemoveRenameListener(this);
UniqueVertexBuffers.Add(VertexBuffer);
}
}
bRegisteredAsRenameListener[InGPUIndex] = false;
}
void FD3D12RayTracingGeometry::ResourceRenamed(FD3D12ContextArray const& Contexts, FD3D12BaseShaderResource* InRenamedResource, FD3D12ResourceLocation* InNewResourceLocation)
{
check(!AreBindlessResourcesEnabledForRayTracing(GetParentAdapter()));
// Empty resource location is used on destruction of the base shader resource but this
// shouldn't happen for RT Geometries because it keeps smart pointers to it's resources.
check(InNewResourceLocation != nullptr);
// Recreate the hit group parameters which cache the address to the index and vertex buffers directly if the geometry is fully valid
uint32 GPUIndex = InRenamedResource->GetParentDevice()->GetGPUIndex();
if (BuffersValid(GPUIndex))
{
SetupHitGroupSystemParameters(GPUIndex);
}
}
bool FD3D12RayTracingGeometry::BuffersValid(uint32 GPUIndex) const
{
if (Initializer.IndexBuffer)
{
const FD3D12Buffer* IndexBuffer = FD3D12DynamicRHI::ResourceCast(Initializer.IndexBuffer.GetReference(), GPUIndex);
if (!IndexBuffer->ResourceLocation.IsValid())
{
return false;
}
}
for (const FRayTracingGeometrySegment& Segment : Initializer.Segments)
{
const FD3D12Buffer* VertexBuffer = FD3D12DynamicRHI::ResourceCast(Segment.VertexBuffer.GetReference(), GPUIndex);
if (!VertexBuffer->ResourceLocation.IsValid())
{
return false;
}
}
return true;
}
void FD3D12RayTracingGeometry::UpdateResidency(FD3D12CommandContext& CommandContext)
{
if (Initializer.IndexBuffer)
{
FD3D12Buffer* IndexBuffer = CommandContext.RetrieveObject<FD3D12Buffer>(Initializer.IndexBuffer.GetReference());
CommandContext.UpdateResidency(IndexBuffer->GetResource());
}
for (const FRayTracingGeometrySegment& Segment : Initializer.Segments)
{
const FBufferRHIRef& RHIVertexBuffer = Segment.VertexBuffer;
FD3D12Buffer* VertexBuffer = CommandContext.RetrieveObject<FD3D12Buffer>(RHIVertexBuffer.GetReference());
CommandContext.UpdateResidency(VertexBuffer->ResourceLocation.GetResource());
}
const uint32 GPUIndex = CommandContext.GetGPUIndex();
CommandContext.UpdateResidency(AccelerationStructureBuffers[GPUIndex]->GetResource());
}
void FD3D12RayTracingGeometry::SetupHitGroupSystemParameters(uint32 InGPUIndex)
{
D3D12_RAYTRACING_GEOMETRY_TYPE GeometryType = TranslateRayTracingGeometryType(Initializer.GeometryType);
bool bBindless = AreBindlessResourcesEnabledForRayTracing(GetParentAdapter());
TArray<FD3D12HitGroupSystemParameters>& HitGroupSystemParametersForThisGPU = HitGroupSystemParameters[InGPUIndex];
HitGroupSystemParametersForThisGPU.Reset(Initializer.Segments.Num());
check(BuffersValid(InGPUIndex));
if (bBindless)
{
AllocateBufferSRVs(InGPUIndex);
}
FD3D12Buffer* IndexBuffer = FD3D12DynamicRHI::ResourceCast(Initializer.IndexBuffer.GetReference(), InGPUIndex);
const uint32 IndexStride = IndexBuffer ? IndexBuffer->GetStride() : 0;
for (int32 SegmentIndex = 0; SegmentIndex < Initializer.Segments.Num(); ++SegmentIndex)
{
const FRayTracingGeometrySegment& Segment = Initializer.Segments[SegmentIndex];
FD3D12Buffer* VertexBuffer = FD3D12DynamicRHI::ResourceCast(Segment.VertexBuffer.GetReference(), InGPUIndex);
FD3D12HitGroupSystemParameters SystemParameters = {};
SystemParameters.RootConstants.SetVertexAndIndexStride(Segment.VertexBufferStride, IndexStride);
if (bBindless)
{
SystemParameters.BindlessHitGroupSystemVertexBuffer = HitGroupSystemSegmentVertexBufferSRVs[InGPUIndex][SegmentIndex]->GetBindlessHandle().GetIndex();
}
else
{
SystemParameters.VertexBuffer = VertexBuffer->ResourceLocation.GetGPUVirtualAddress() + Segment.VertexBufferOffset;
}
if (GeometryType == D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES && IndexBuffer != nullptr)
{
if (bBindless)
{
SystemParameters.BindlessHitGroupSystemIndexBuffer = HitGroupSystemIndexBufferSRV[InGPUIndex]->GetBindlessHandle().GetIndex();
}
else
{
SystemParameters.IndexBuffer = IndexBuffer->ResourceLocation.GetGPUVirtualAddress();
}
SystemParameters.RootConstants.IndexBufferOffsetInBytes = Initializer.IndexBufferOffset + IndexStride * Segment.FirstPrimitive * FD3D12RayTracingGeometry::IndicesPerPrimitive;
SystemParameters.RootConstants.FirstPrimitive = Segment.FirstPrimitive;
}
HitGroupSystemParametersForThisGPU.Add(SystemParameters);
}
// Notify listeners about changes
HitGroupParamatersUpdated();
}
void FD3D12RayTracingGeometry::CreateAccelerationStructureBuildDesc(FD3D12CommandContext& CommandContext, EAccelerationStructureBuildMode BuildMode, D3D12_GPU_VIRTUAL_ADDRESS ScratchBufferAddress, D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC& OutDesc, TArrayView<D3D12_RAYTRACING_GEOMETRY_DESC>& OutGeometryDescs) const
{
if (Initializer.IndexBuffer)
{
checkf(Initializer.IndexBuffer->GetStride() == 2 || Initializer.IndexBuffer->GetStride() == 4, TEXT("Index buffer must be 16 or 32 bit."));
}
const uint32 GPUIndex = CommandContext.GetGPUIndex();
const uint32 IndexStride = Initializer.IndexBuffer ? Initializer.IndexBuffer->GetStride() : 0;
const bool bIsUpdate = BuildMode == EAccelerationStructureBuildMode::Update;
// Use the pre-built descs as template and set the GPU resource pointers (current VB/IB).
check(OutGeometryDescs.Num() == GeometryDescs.Num());
checkf(BuffersValid(GPUIndex), TEXT("Index & vertex buffers should be valid (not streamed out) when building the acceleration structure"));
FD3D12Buffer* IndexBuffer = CommandContext.RetrieveObject<FD3D12Buffer>(Initializer.IndexBuffer.GetReference());
FD3D12Buffer* NullTransformBufferD3D12 = CommandContext.RetrieveObject<FD3D12Buffer>(NullTransformBuffer.GetReference());
const TArray<FD3D12HitGroupSystemParameters>& HitGroupSystemParametersForThisGPU = HitGroupSystemParameters[GPUIndex];
check(HitGroupSystemParametersForThisGPU.Num() == Initializer.Segments.Num());
ERayTracingAccelerationStructureFlags BuildFlags = GetRayTracingAccelerationStructureBuildFlags(Initializer);
D3D12_RAYTRACING_GEOMETRY_TYPE GeometryType = TranslateRayTracingGeometryType(Initializer.GeometryType);
for (int32 SegmentIndex = 0; SegmentIndex < Initializer.Segments.Num(); ++SegmentIndex)
{
D3D12_RAYTRACING_GEOMETRY_DESC& Desc = OutGeometryDescs[SegmentIndex];
Desc = GeometryDescs[SegmentIndex]; // Copy from template
const FRayTracingGeometrySegment& Segment = Initializer.Segments[SegmentIndex];
const FD3D12HitGroupSystemParameters& SystemParameters = HitGroupSystemParametersForThisGPU[SegmentIndex];
FD3D12Buffer* VertexBuffer = CommandContext.RetrieveObject<FD3D12Buffer>(Segment.VertexBuffer.GetReference());
switch (GeometryType)
{
case D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES:
switch (Segment.VertexBufferElementType)
{
case VET_Float4:
// While the DXGI_FORMAT_R32G32B32A32_FLOAT format is not supported by DXR, since we manually load vertex
// data when we are building the BLAS, we can just rely on the vertex stride to offset the read index,
// and read only the 3 vertex components, and so use the DXGI_FORMAT_R32G32B32_FLOAT vertex format
case VET_Float3:
check(Desc.Triangles.VertexFormat == DXGI_FORMAT_R32G32B32_FLOAT);
break;
case VET_Float2:
check(Desc.Triangles.VertexFormat == DXGI_FORMAT_R32G32_FLOAT);
break;
case VET_Half2:
check(Desc.Triangles.VertexFormat == DXGI_FORMAT_R16G16_FLOAT);
break;
default:
checkNoEntry();
break;
}
if (!Segment.bEnabled)
{
Desc.Triangles.IndexCount = 0;
}
checkf(Desc.Triangles.Transform3x4 == D3D12_GPU_VIRTUAL_ADDRESS(0), TEXT("BLAS geometry transforms are not supported!"));
if (IndexBuffer)
{
check(Desc.Triangles.IndexCount <= Segment.NumPrimitives * FD3D12RayTracingGeometry::IndicesPerPrimitive);
Desc.Triangles.IndexFormat = (IndexStride == 4 ? DXGI_FORMAT_R32_UINT : DXGI_FORMAT_R16_UINT);
Desc.Triangles.IndexBuffer = IndexBuffer->ResourceLocation.GetGPUVirtualAddress() + SystemParameters.RootConstants.IndexBufferOffsetInBytes;
}
else
{
// Non-indexed geometry
checkf(Initializer.Segments.Num() == 1, TEXT("Non-indexed geometry with multiple segments is not implemented."));
check(Desc.Triangles.IndexFormat == DXGI_FORMAT_UNKNOWN);
check(Desc.Triangles.IndexCount == 0);
check(Desc.Triangles.IndexBuffer == D3D12_GPU_VIRTUAL_ADDRESS(0));
}
Desc.Triangles.VertexBuffer.StartAddress = VertexBuffer->ResourceLocation.GetGPUVirtualAddress() + Segment.VertexBufferOffset;
Desc.Triangles.VertexBuffer.StrideInBytes = Segment.VertexBufferStride;
break;
case D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS:
Desc.AABBs.AABBCount = Segment.NumPrimitives;
Desc.AABBs.AABBs.StartAddress = VertexBuffer->ResourceLocation.GetGPUVirtualAddress() + Segment.VertexBufferOffset;
Desc.AABBs.AABBs.StrideInBytes = Segment.VertexBufferStride;
break;
default:
checkf(false, TEXT("Unexpected ray tracing geometry type"));
break;
}
if (GeometryType == D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES)
{
// #dxr_todo UE-72160: support various vertex buffer layouts (fetch/decode based on vertex stride and format)
checkf(Segment.VertexBufferElementType == VET_Float3 || Segment.VertexBufferElementType == VET_Float4, TEXT("Only VET_Float3 and Float4 are currently implemented and tested. Other formats will be supported in the future."));
}
}
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS LocalBuildFlags = TranslateRayTracingAccelerationStructureFlags(BuildFlags);
if (bIsUpdate)
{
checkf(EnumHasAllFlags(BuildFlags, ERayTracingAccelerationStructureFlags::AllowUpdate),
TEXT("Acceleration structure must be created with FRayTracingGeometryInitializer::bAllowUpdate=true to perform refit / update."));
LocalBuildFlags |= D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE;
}
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS PrebuildDescInputs = {};
PrebuildDescInputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
PrebuildDescInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
PrebuildDescInputs.NumDescs = OutGeometryDescs.Num();
PrebuildDescInputs.pGeometryDescs = OutGeometryDescs.GetData();
PrebuildDescInputs.Flags = LocalBuildFlags;
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO PrebuildInfo = {};
CommandContext.GetParentDevice()->GetRaytracingAccelerationStructurePrebuildInfo(&PrebuildDescInputs, &PrebuildInfo);
// Must make sure that values computed in the constructor are valid.
check(PrebuildInfo.ResultDataMaxSizeInBytes <= SizeInfo.ResultSize);
if (bIsUpdate)
{
check(PrebuildInfo.UpdateScratchDataSizeInBytes <= SizeInfo.UpdateScratchSize);
}
else
{
check(PrebuildInfo.ScratchDataSizeInBytes <= SizeInfo.BuildScratchSize);
}
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC BuildDesc = {};
BuildDesc.Inputs = PrebuildDescInputs;
BuildDesc.DestAccelerationStructureData = AccelerationStructureBuffers[GPUIndex]->ResourceLocation.GetGPUVirtualAddress();
BuildDesc.ScratchAccelerationStructureData = ScratchBufferAddress;
BuildDesc.SourceAccelerationStructureData = bIsUpdate
? AccelerationStructureBuffers[GPUIndex]->ResourceLocation.GetGPUVirtualAddress()
: D3D12_GPU_VIRTUAL_ADDRESS(0);
OutDesc = BuildDesc;
}
static bool ShouldCompactAfterBuild(ERayTracingAccelerationStructureFlags BuildFlags)
{
return EnumHasAllFlags(BuildFlags, ERayTracingAccelerationStructureFlags::AllowCompaction | ERayTracingAccelerationStructureFlags::FastTrace)
&& !EnumHasAnyFlags(BuildFlags, ERayTracingAccelerationStructureFlags::AllowUpdate);
}
void FD3D12RayTracingGeometry::CompactAccelerationStructure(FD3D12CommandContext& CommandContext, uint32 InGPUIndex, uint64 InSizeAfterCompaction)
{
LLM_SCOPE_BYNAME(TEXT("FD3D12RT/CompactBLAS"));
// Should have a pending request
check(bHasPendingCompactionRequests[InGPUIndex]);
bHasPendingCompactionRequests[InGPUIndex] = false;
ensureMsgf(InSizeAfterCompaction > 0, TEXT("Compacted acceleration structure size is expected to be non-zero. This error suggests that GPU readback synchronization is broken."));
if (InSizeAfterCompaction == 0)
{
return;
}
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingUsedVideoMemory, AccelerationStructureBuffers[InGPUIndex]->GetSize());
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingBLASMemory, AccelerationStructureBuffers[InGPUIndex]->GetSize());
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingStaticBLASMemory, AccelerationStructureBuffers[InGPUIndex]->GetSize());
UnregisterD3D12RayTracingGeometry(this);
// Move old AS into this temporary variable which gets released when this function returns
TRefCountPtr<FD3D12Buffer> OldAccelerationStructure = AccelerationStructureBuffers[InGPUIndex];
AccelerationStructureBuffers[InGPUIndex] = CreateRayTracingBuffer(CommandContext.GetParentAdapter(), InGPUIndex, InSizeAfterCompaction, ERayTracingBufferType::AccelerationStructure, DebugName);
AccelerationStructureBuffers[InGPUIndex]->SetOwnerName(OwnerName);
INC_MEMORY_STAT_BY(STAT_D3D12RayTracingUsedVideoMemory, AccelerationStructureBuffers[InGPUIndex]->GetSize());
INC_MEMORY_STAT_BY(STAT_D3D12RayTracingBLASMemory, AccelerationStructureBuffers[InGPUIndex]->GetSize());
INC_MEMORY_STAT_BY(STAT_D3D12RayTracingStaticBLASMemory, AccelerationStructureBuffers[InGPUIndex]->GetSize());
CommandContext.UpdateResidency(OldAccelerationStructure->GetResource());
CommandContext.UpdateResidency(AccelerationStructureBuffers[InGPUIndex]->GetResource());
CommandContext.RayTracingCommandList()->CopyRaytracingAccelerationStructure(
AccelerationStructureBuffers[InGPUIndex]->ResourceLocation.GetGPUVirtualAddress(),
OldAccelerationStructure->ResourceLocation.GetGPUVirtualAddress(),
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_COMPACT
);
AccelerationStructureCompactedSize = InSizeAfterCompaction;
RegisterD3D12RayTracingGeometry(this);
}
FD3D12RayTracingScene::FD3D12RayTracingScene(FD3D12Adapter* Adapter, FRayTracingSceneInitializer InInitializer)
: FD3D12AdapterChild(Adapter), Initializer(MoveTemp(InInitializer))
{
INC_DWORD_STAT(STAT_D3D12RayTracingAllocatedTLAS);
checkf(Initializer.Lifetime == RTSL_SingleFrame, TEXT("Only single-frame ray tracing scenes are currently implemented."));
// Get maximum buffer sizes for all GPUs in the system
SizeInfo = RHICalcRayTracingSceneSize(Initializer);
};
FD3D12RayTracingScene::~FD3D12RayTracingScene()
{
ReleaseBuffer();
DEC_DWORD_STAT(STAT_D3D12RayTracingAllocatedTLAS);
}
void FD3D12RayTracingScene::ReleaseBuffer()
{
for (auto& AccelerationStructureBuffer : AccelerationStructureBuffers)
{
if (AccelerationStructureBuffer)
{
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingUsedVideoMemory, AccelerationStructureBuffer->GetSize());
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingTLASMemory, AccelerationStructureBuffer->GetSize());
}
AccelerationStructureBuffer = nullptr;
}
}
void FD3D12RayTracingScene::BindBuffer(FRHIBuffer* InBuffer, uint32 InBufferOffset)
{
check(SizeInfo.ResultSize + InBufferOffset <= InBuffer->GetSize());
for (uint32 GPUIndex = 0; GPUIndex < GNumExplicitGPUsForRendering; ++GPUIndex)
{
if (AccelerationStructureBuffers[GPUIndex])
{
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingUsedVideoMemory, AccelerationStructureBuffers[GPUIndex]->GetSize());
DEC_MEMORY_STAT_BY(STAT_D3D12RayTracingTLASMemory, AccelerationStructureBuffers[GPUIndex]->GetSize());
}
AccelerationStructureBuffers[GPUIndex] = FD3D12CommandContext::RetrieveObject<FD3D12Buffer>(InBuffer, GPUIndex);
INC_MEMORY_STAT_BY(STAT_D3D12RayTracingUsedVideoMemory, AccelerationStructureBuffers[GPUIndex]->GetSize());
INC_MEMORY_STAT_BY(STAT_D3D12RayTracingTLASMemory, AccelerationStructureBuffers[GPUIndex]->GetSize());
}
BufferOffset = InBufferOffset;
}
void PrepareAccelerationStructureBuild(
FD3D12CommandContext& CommandContext,
FD3D12RayTracingScene& Scene,
FD3D12Buffer* ScratchBuffer, uint32 ScratchBufferOffset,
FD3D12Buffer* InstanceBuffer, uint32 InstanceBufferOffset,
uint32 NumInstances,
EAccelerationStructureBuildMode BuildMode,
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC& OutBuildDesc)
{
TRACE_CPUPROFILER_EVENT_SCOPE(PrepareAccelerationStructureBuild_TopLevel);
check(InstanceBuffer != nullptr);
checkf(NumInstances <= Scene.Initializer.MaxNumInstances, TEXT("NumInstances must be less or equal to MaxNumInstances"));
const bool bIsUpdate = BuildMode == EAccelerationStructureBuildMode::Update;
if (bIsUpdate)
{
checkf(NumInstances == Scene.NumInstances, TEXT("Number of instances used to update TLAS must match the number used to build."));
}
else
{
Scene.NumInstances = NumInstances;
}
const uint32 GPUIndex = CommandContext.GetGPUIndex();
FD3D12Adapter* Adapter = CommandContext.GetParentAdapter();
TRefCountPtr<FD3D12Buffer> AutoScratchBuffer;
if (ScratchBuffer == nullptr)
{
const uint64 ScratchBufferSize = bIsUpdate ? Scene.SizeInfo.UpdateScratchSize : Scene.SizeInfo.BuildScratchSize;
static const FName ScratchBufferName("AutoBuildScratchTLAS");
AutoScratchBuffer = CreateRayTracingBuffer(Adapter, GPUIndex, ScratchBufferSize, ERayTracingBufferType::Scratch, ScratchBufferName);
ScratchBuffer = AutoScratchBuffer.GetReference();
ScratchBufferOffset = 0;
}
if (bIsUpdate)
{
checkf(ScratchBuffer, TEXT("TLAS update requires scratch buffer of at least %lld bytes."), Scene.SizeInfo.UpdateScratchSize);
}
else
{
checkf(ScratchBuffer, TEXT("TLAS build requires scratch buffer of at least %lld bytes."), Scene.SizeInfo.BuildScratchSize);
}
{
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS BuildInputs;
BuildInputs = {};
BuildInputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
BuildInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
BuildInputs.NumDescs = NumInstances;
BuildInputs.Flags = TranslateRayTracingAccelerationStructureFlags(Scene.Initializer.BuildFlags);
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO PrebuildInfo = {};
CommandContext.GetParentDevice()->GetRaytracingAccelerationStructurePrebuildInfo(&BuildInputs, &PrebuildInfo);
checkf(PrebuildInfo.ResultDataMaxSizeInBytes <= Scene.SizeInfo.ResultSize,
TEXT("TLAS build result buffer now requires %lld bytes, but only %lld was calculated in the constructor."),
PrebuildInfo.ResultDataMaxSizeInBytes, Scene.SizeInfo.ResultSize);
checkf(PrebuildInfo.ScratchDataSizeInBytes <= Scene.SizeInfo.BuildScratchSize,
TEXT("TLAS build scratch buffer now requires %lld bytes, but only %lld was calculated in the constructor."),
PrebuildInfo.ScratchDataSizeInBytes, Scene.SizeInfo.BuildScratchSize);
checkf(PrebuildInfo.UpdateScratchDataSizeInBytes <= Scene.SizeInfo.UpdateScratchSize,
TEXT("TLAS update scratch buffer now requires %lld bytes, but only %lld was calculated in the constructor."),
PrebuildInfo.UpdateScratchDataSizeInBytes, Scene.SizeInfo.UpdateScratchSize);
if (bIsUpdate)
{
checkf(ScratchBufferOffset + PrebuildInfo.UpdateScratchDataSizeInBytes <= ScratchBuffer->GetSize(),
TEXT("TLAS scratch buffer size is %d bytes with offset %d (%d bytes available), but the update requires %lld bytes (NumInstances = %d)."),
ScratchBuffer->GetSize(), ScratchBufferOffset, ScratchBuffer->GetSize() - ScratchBufferOffset,
PrebuildInfo.UpdateScratchDataSizeInBytes, NumInstances);
}
else
{
checkf(ScratchBufferOffset + PrebuildInfo.ScratchDataSizeInBytes <= ScratchBuffer->GetSize(),
TEXT("TLAS scratch buffer size is %d bytes with offset %d (%d bytes available), but the build requires %lld bytes (NumInstances = %d)."),
ScratchBuffer->GetSize(), ScratchBufferOffset, ScratchBuffer->GetSize() - ScratchBufferOffset,
PrebuildInfo.ScratchDataSizeInBytes, NumInstances);
}
}
// Make necessary resources resident
TRefCountPtr<FD3D12Buffer>& AccelerationStructureBuffer = Scene.AccelerationStructureBuffers[GPUIndex];
checkf(AccelerationStructureBuffer.IsValid(),
TEXT("Acceleration structure buffer must be set for this scene using RHIBindAccelerationStructureMemory() before build command is issued."));
CommandContext.UpdateResidency(AccelerationStructureBuffer->GetResource());
CommandContext.UpdateResidency(InstanceBuffer->GetResource());
CommandContext.UpdateResidency(ScratchBuffer->GetResource());
{
// at the same time also gather resources that need to be resident when using TLAS
TArray<const FD3D12Resource*>& ResourcesToMakeResidentForThisGPU = Scene.ResourcesToMakeResident[GPUIndex];
ResourcesToMakeResidentForThisGPU.Reset(0);
Experimental::TSherwoodSet<FD3D12ResidencyHandle*> UniqueResidencyHandles;
auto AddResidencyHandleForResource = [&UniqueResidencyHandles, &ResourcesToMakeResidentForThisGPU] (FD3D12Resource* Resource)
{
#if ENABLE_RESIDENCY_MANAGEMENT
bool bShouldTrackResidency = false;
if (Resource->NeedsDeferredResidencyUpdate())
{
// Resources whose residency handles might change dynamically must always be tracked
bShouldTrackResidency = true;
}
else
{
// Resources that share *all* residency handles with what's already tracked don't need to be added to be tracked separately
for (FD3D12ResidencyHandle* ResidencyHandle : Resource->GetResidencyHandles())
{
if (D3DX12Residency::IsInitialized(ResidencyHandle))
{
bool bIsAlreadyInSet = false;
UniqueResidencyHandles.Add(ResidencyHandle, &bIsAlreadyInSet);
if (!bIsAlreadyInSet)
{
bShouldTrackResidency = true;
}
}
}
}
if (bShouldTrackResidency)
{
ResourcesToMakeResidentForThisGPU.Add(Resource);
}
#endif // ENABLE_RESIDENCY_MANAGEMENT
};
const int32 NumReferencedGeometries = Scene.ReferencedGeometries.Num();
for (int32 Index = 0; Index < NumReferencedGeometries; ++Index)
{
FD3D12RayTracingGeometry* Geometry = FD3D12DynamicRHI::ResourceCast(Scene.ReferencedGeometries[Index].GetReference());
checkf(!Geometry->IsDirty(CommandContext.GetGPUIndex()),
TEXT("Acceleration structures for all geometries must be built before building the top level acceleration structure for the scene."));
CommandContext.UpdateResidency(Geometry->AccelerationStructureBuffers[GPUIndex]->ResourceLocation.GetResource());
AddResidencyHandleForResource(Geometry->AccelerationStructureBuffers[GPUIndex]->GetResource());
if (GRHIGlobals.RayTracing.SupportsShaders || GRHIGlobals.RayTracing.RequiresInlineRayTracingSBT)
{
checkf(Geometry->BuffersValid(CommandContext.GetGPUIndex()),
TEXT("Index & vertex buffers for all geometries must be valid (streamed in) when adding geometry to the top level acceleration structure for the scene"));
if (Geometry->Initializer.IndexBuffer)
{
FD3D12Buffer* IndexBuffer = CommandContext.RetrieveObject<FD3D12Buffer>(Geometry->Initializer.IndexBuffer.GetReference());
AddResidencyHandleForResource(IndexBuffer->GetResource());
}
for (const FRayTracingGeometrySegment& Segment : Geometry->Initializer.Segments)
{
if (Segment.VertexBuffer)
{
FD3D12Buffer* VertexBuffer = CommandContext.RetrieveObject<FD3D12Buffer>(Segment.VertexBuffer.GetReference());
AddResidencyHandleForResource(VertexBuffer->GetResource());
}
}
}
}
}
if (ShouldRunRayTracingGPUValidation())
{
RHI_BREADCRUMB_EVENT(CommandContext, "RTSceneValidation");
TRHICommandList_RecursiveHazardous<FD3D12CommandContext> RHICmdList(&CommandContext);
uint32 InstanceBufferStride = GRHIRayTracingInstanceDescriptorSize;
PRAGMA_DISABLE_DEPRECATION_WARNINGS
// TODO: Validation related to SBT needs to be done somewhere else since SBT is not known when in BuildAccelerationStructure
uint32 TotalHitGroupSlots = Scene.Initializer.NumTotalSegments;
PRAGMA_ENABLE_DEPRECATION_WARNINGS
FRayTracingValidateSceneBuildParamsCS::Dispatch(RHICmdList,
TotalHitGroupSlots, NumInstances,
InstanceBuffer, InstanceBufferOffset, InstanceBufferStride);
}
{
const D3D12_GPU_VIRTUAL_ADDRESS BufferAddress = AccelerationStructureBuffer->ResourceLocation.GetGPUVirtualAddress() + Scene.BufferOffset;
D3D12_GPU_VIRTUAL_ADDRESS ScratchAddress = ScratchBuffer->ResourceLocation.GetGPUVirtualAddress() + ScratchBufferOffset;
checkf(BufferAddress % GRHIRayTracingAccelerationStructureAlignment == 0,
TEXT("TLAS buffer (plus offset) must be aligned to %lld bytes."),
GRHIRayTracingAccelerationStructureAlignment);
checkf(ScratchAddress % GRHIRayTracingScratchBufferAlignment == 0,
TEXT("TLAS scratch buffer (plus offset) must be aligned to %lld bytes."),
GRHIRayTracingScratchBufferAlignment);
OutBuildDesc.Inputs = {};
OutBuildDesc.Inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
OutBuildDesc.Inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
OutBuildDesc.Inputs.NumDescs = NumInstances;
OutBuildDesc.Inputs.InstanceDescs = InstanceBuffer->ResourceLocation.GetGPUVirtualAddress() + InstanceBufferOffset;
OutBuildDesc.Inputs.Flags = TranslateRayTracingAccelerationStructureFlags(Scene.Initializer.BuildFlags);
if (bIsUpdate)
{
checkf(EnumHasAllFlags(Scene.Initializer.BuildFlags, ERayTracingAccelerationStructureFlags::AllowUpdate),
TEXT("Acceleration structure must be created with FRayTracingGeometryInitializer::bAllowUpdate=true to perform refit / update."));
OutBuildDesc.Inputs.Flags |= D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE;
}
OutBuildDesc.DestAccelerationStructureData = BufferAddress;
OutBuildDesc.ScratchAccelerationStructureData = ScratchAddress;
OutBuildDesc.SourceAccelerationStructureData = bIsUpdate ? BufferAddress : D3D12_GPU_VIRTUAL_ADDRESS(0);
if (bIsUpdate)
{
INC_DWORD_STAT(STAT_D3D12RayTracingUpdatedTLAS);
}
else
{
INC_DWORD_STAT(STAT_D3D12RayTracingBuiltTLAS);
}
}
}
void FD3D12RayTracingScene::UpdateResidency(FD3D12CommandContext& CommandContext) const
{
#if ENABLE_RESIDENCY_MANAGEMENT
const uint32 GPUIndex = CommandContext.GetGPUIndex();
CommandContext.UpdateResidency(AccelerationStructureBuffers[GPUIndex]->GetResource());
for (const FD3D12Resource* Resource : ResourcesToMakeResident[GPUIndex])
{
CommandContext.UpdateResidency(Resource);
}
#endif // ENABLE_RESIDENCY_MANAGEMENT
}
void FD3D12CommandContext::BuildAccelerationStructuresInternal(TConstArrayView<D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC> BuildDescs)
{
for (const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC& Desc : BuildDescs)
{
GraphicsCommandList4()->BuildRaytracingAccelerationStructure(&Desc, 0, nullptr);
}
}
#if WITH_MGPU
void FD3D12CommandContext::UnregisterAccelerationStructuresInternalMGPU(TConstArrayView<FRayTracingGeometryBuildParams> Params, FRHIGPUMask GPUMask)
{
// We need to unregister rename listeners for all GPUs in a separate pass before running "RHIBuildAccelerationStructures", as the build process
// may modify the buffer references in the ray tracing geometry. This leads to an assert where the code attempts to unregister the newer buffer
// references on the additional GPUs, rather than the original buffer references. It's OK to unregister redundantly, as a flag is set to track
// whether a buffer is registered, and additional unregister calls do nothing.
for (uint32 GPUIndex : GPUMask)
{
for (const FRayTracingGeometryBuildParams& P : Params)
{
FD3D12RayTracingGeometry* Geometry = FD3D12DynamicRHI::ResourceCast(P.Geometry.GetReference());
Geometry->UnregisterAsRenameListener(GPUIndex);
}
}
}
#endif // WITH_MGPU
void FD3D12CommandContext::RHIBuildAccelerationStructures(TConstArrayView<FRayTracingGeometryBuildParams> Params, const FRHIBufferRange& ScratchBufferRange)
{
TRACE_CPUPROFILER_EVENT_SCOPE(BuildAccelerationStructure_BottomLevel);
SCOPE_CYCLE_COUNTER(STAT_D3D12BuildBLAS);
LLM_SCOPE_BYNAME(TEXT("FD3D12RT/BLAS"));
checkf(ScratchBufferRange.Buffer != nullptr, TEXT("BuildAccelerationStructures requires valid scratch buffer"));
// Update geometry vertex buffers
for (const FRayTracingGeometryBuildParams& P : Params)
{
FD3D12RayTracingGeometry* Geometry = FD3D12DynamicRHI::ResourceCast(P.Geometry.GetReference());
Geometry->UnregisterAsRenameListener(GetGPUIndex());
if (P.Segments.Num())
{
checkf(P.Segments.Num() == Geometry->Initializer.Segments.Num(),
TEXT("If updated segments are provided, they must exactly match existing geometry segments. Only vertex buffer bindings may change."));
for (int32 i = 0; i < P.Segments.Num(); ++i)
{
checkf(P.Segments[i].MaxVertices <= Geometry->Initializer.Segments[i].MaxVertices,
TEXT("Maximum number of vertices in a segment (%u) must not be larger than what was declared during FRHIRayTracingGeometry creation (%u), as this controls BLAS memory allocation."),
P.Segments[i].MaxVertices, Geometry->Initializer.Segments[i].MaxVertices
);
Geometry->Initializer.Segments[i].VertexBuffer = P.Segments[i].VertexBuffer;
Geometry->Initializer.Segments[i].VertexBufferElementType = P.Segments[i].VertexBufferElementType;
Geometry->Initializer.Segments[i].VertexBufferStride = P.Segments[i].VertexBufferStride;
Geometry->Initializer.Segments[i].VertexBufferOffset = P.Segments[i].VertexBufferOffset;
}
}
}
FlushResourceBarriers();
const uint32 GPUIndex = GetGPUIndex();
// Then do all work
TArray<D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC, TInlineAllocator<32>> BuildDescs;
BuildDescs.Reserve(Params.Num());
uint32 ScratchBufferSize = ScratchBufferRange.Size ? ScratchBufferRange.Size : ScratchBufferRange.Buffer->GetSize();
checkf(ScratchBufferSize + ScratchBufferRange.Offset <= ScratchBufferRange.Buffer->GetSize(),
TEXT("BLAS scratch buffer range size is %lld bytes with offset %lld, but the buffer only has %lld bytes. "),
ScratchBufferRange.Size, ScratchBufferRange.Offset, ScratchBufferRange.Buffer->GetSize());
const uint64 ScratchAlignment = GRHIRayTracingAccelerationStructureAlignment;
FD3D12Buffer* ScratchBuffer = FD3D12DynamicRHI::ResourceCast(ScratchBufferRange.Buffer, GPUIndex);
uint32 ScratchBufferOffset = ScratchBufferRange.Offset;
UpdateResidency(ScratchBuffer->GetResource());
FMemMark Mark(FMemStack::Get());
for (int32 i = 0; i < Params.Num(); i++)
{
const FRayTracingGeometryBuildParams& P = Params[i];
FD3D12RayTracingGeometry* Geometry = FD3D12DynamicRHI::ResourceCast(P.Geometry.GetReference());
Geometry->SetDirty(GetGPUMask(), true);
// Register as rename listener to index/vertex buffers
Geometry->UnregisterAsRenameListener(GPUIndex);
Geometry->RegisterAsRenameListener(GPUIndex);
// Recreate the hit group system parameters and use them during setup of the descs
Geometry->SetupHitGroupSystemParameters(GPUIndex);
if (Geometry->IsDirty(GPUIndex))
{
uint64 ScratchBufferRequiredSize = P.BuildMode == EAccelerationStructureBuildMode::Update ? Geometry->SizeInfo.UpdateScratchSize : Geometry->SizeInfo.BuildScratchSize;
checkf(ScratchBufferRequiredSize + ScratchBufferOffset <= ScratchBufferSize,
TEXT("BLAS scratch buffer size is %lld bytes with offset %lld (%lld bytes available), but the build requires %lld bytes. "),
ScratchBufferSize, ScratchBufferOffset, ScratchBufferSize - ScratchBufferOffset, ScratchBufferRequiredSize);
D3D12_GPU_VIRTUAL_ADDRESS ScratchBufferAddress = ScratchBuffer->ResourceLocation.GetGPUVirtualAddress() + ScratchBufferOffset;
ScratchBufferOffset = Align(ScratchBufferOffset + ScratchBufferRequiredSize, ScratchAlignment);
checkf(ScratchBufferAddress % GRHIRayTracingAccelerationStructureAlignment == 0,
TEXT("BLAS scratch buffer (plus offset) must be aligned to %lld bytes."),
GRHIRayTracingAccelerationStructureAlignment);
// We need to keep D3D12_RAYTRACING_GEOMETRY_DESCs that are used in D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC alive.
const uint32 NumGeometryDescs = Geometry->GeometryDescs.Num();
D3D12_RAYTRACING_GEOMETRY_DESC* LocalGeometryDescsMemory = (D3D12_RAYTRACING_GEOMETRY_DESC*)FMemStack::Get().Alloc(NumGeometryDescs * sizeof(D3D12_RAYTRACING_GEOMETRY_DESC), alignof(D3D12_RAYTRACING_GEOMETRY_DESC));
TArrayView<D3D12_RAYTRACING_GEOMETRY_DESC> LocalGeometryDescs = MakeArrayView(LocalGeometryDescsMemory, NumGeometryDescs);
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC& BuildDesc = BuildDescs.AddZeroed_GetRef();
Geometry->CreateAccelerationStructureBuildDesc(*this, P.BuildMode, ScratchBufferAddress, BuildDesc, LocalGeometryDescs);
Geometry->UpdateResidency(*this);
if (P.BuildMode == EAccelerationStructureBuildMode::Update)
{
INC_DWORD_STAT(STAT_D3D12RayTracingUpdatedBLAS);
}
else
{
INC_DWORD_STAT(STAT_D3D12RayTracingBuiltBLAS);
}
}
}
if (ShouldRunRayTracingGPUValidation())
{
RHI_BREADCRUMB_EVENT(*this, "RTGeometryValidation");
TRHICommandList_RecursiveHazardous<FD3D12CommandContext> RHICmdList(this);
for (const FRayTracingGeometryBuildParams& P : Params)
{
FRayTracingValidateGeometryBuildParamsCS::Dispatch(RHICmdList, P);
}
}
BuildAccelerationStructuresInternal(BuildDescs);
for (const FRayTracingGeometryBuildParams& P : Params)
{
FD3D12RayTracingGeometry* Geometry = FD3D12DynamicRHI::ResourceCast(P.Geometry.GetReference());
if (Geometry->IsDirty(GPUIndex))
{
ERayTracingAccelerationStructureFlags GeometryBuildFlags = GetRayTracingAccelerationStructureBuildFlags(Geometry->Initializer);
if (ShouldCompactAfterBuild(GeometryBuildFlags))
{
GetParentDevice()->GetRayTracingCompactionRequestHandler()->RequestCompact(Geometry);
Geometry->bHasPendingCompactionRequests[GPUIndex] = true;
}
Geometry->SetDirty(GetGPUMask(), false);
}
}
// Add a UAV barrier after each acceleration structure build batch.
// This is required because there are currently no explicit read/write barriers
// for acceleration structures, but we need to ensure that all commands
// are complete before BLAS is used again on the GPU.
AddUAVBarrier();
}
void FD3D12CommandContext::RHIBuildAccelerationStructures(TConstArrayView<FRayTracingSceneBuildParams> Params)
{
TRACE_CPUPROFILER_EVENT_SCOPE(BuildAccelerationStructures_TopLevel);
SCOPE_CYCLE_COUNTER(STAT_D3D12BuildTLAS);
TArray<D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC, TInlineAllocator<8>> BuildDescs;
BuildDescs.Reserve(Params.Num());
for (const FRayTracingSceneBuildParams& SceneBuildParams : Params)
{
FD3D12RayTracingScene* Scene = FD3D12DynamicRHI::ResourceCast(SceneBuildParams.Scene);
FD3D12Buffer* ScratchBuffer = RetrieveObject<FD3D12Buffer>(SceneBuildParams.ScratchBuffer);
FD3D12Buffer* InstanceBuffer = RetrieveObject<FD3D12Buffer>(SceneBuildParams.InstanceBuffer);
Scene->ReferencedGeometries.Reserve(SceneBuildParams.ReferencedGeometries.Num());
for (FRHIRayTracingGeometry* ReferencedGeometry : SceneBuildParams.ReferencedGeometries)
{
Scene->ReferencedGeometries.Add(ReferencedGeometry);
}
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC& BuildDesc = BuildDescs.AddDefaulted_GetRef();
PrepareAccelerationStructureBuild(
*this,
*Scene,
ScratchBuffer, SceneBuildParams.ScratchBufferOffset,
InstanceBuffer, SceneBuildParams.InstanceBufferOffset,
SceneBuildParams.NumInstances,
SceneBuildParams.BuildMode,
BuildDesc
);
}
// UAV barrier is used here to ensure that all bottom level acceleration structures are built
AddUAVBarrier();
FlushResourceBarriers();
BuildAccelerationStructuresInternal(BuildDescs);
// UAV barrier is used here to ensure that the acceleration structure build is complete before any rays are traced
// #dxr_todo: these barriers should ideally be inserted by the high level code to allow more overlapped execution
AddUAVBarrier();
for (const FRayTracingSceneBuildParams& SceneBuildParams : Params)
{
FD3D12RayTracingScene* Scene = FD3D12DynamicRHI::ResourceCast(SceneBuildParams.Scene);
FD3D12Buffer* ScratchBuffer = RetrieveObject<FD3D12Buffer>(SceneBuildParams.ScratchBuffer);
FD3D12Buffer* InstanceBuffer = RetrieveObject<FD3D12Buffer>(SceneBuildParams.InstanceBuffer);
Scene->bBuilt = true;
#if D3D12_RHI_SUPPORT_RAYTRACING_SCENE_DEBUGGING
D3D12RayTracingSceneDebugUpdate(*Scene, InstanceBuffer, SceneBuildParams.InstanceBufferOffset, *this);
#endif // D3D12_RHI_SUPPORT_RAYTRACING_SCENE_DEBUGGING
}
}
void FD3D12CommandContext::RHIBindAccelerationStructureMemory(FRHIRayTracingScene* InScene, FRHIBuffer* InBuffer, uint32 InBufferOffset)
{
FD3D12RayTracingScene* Scene = FD3D12DynamicRHI::ResourceCast(InScene);
Scene->BindBuffer(InBuffer, InBufferOffset);
}
void FD3D12CommandContext::RHICommitShaderBindingTable(FRHIShaderBindingTable* InSBT, FRHIBuffer* InlineBindingDataBuffer)
{
FD3D12RayTracingShaderBindingTable* SBT = FD3D12DynamicRHI::ResourceCast(InSBT);
check(SBT);
FD3D12RayTracingShaderBindingTableInternal* ShaderTableForDevice = SBT->GetTableForDevice(GetParentDevice());
if (ShaderTableForDevice->bIsDirty)
{
ShaderTableForDevice->Commit(*this, InlineBindingDataBuffer);
}
}
void FD3D12CommandContext::RHIClearShaderBindingTable(FRHIShaderBindingTable* InSBT)
{
FD3D12RayTracingShaderBindingTable* SBT = FD3D12DynamicRHI::ResourceCast(InSBT);
check(SBT);
SBT->ReleaseForDevice(GetParentDevice());
}
static constexpr uint32 LooseParameterCBVIndex = 0; // Global uniform buffer is always assumed to be in slot 0
struct FD3D12RayTracingGlobalResourceBinder
{
FD3D12RayTracingGlobalResourceBinder(FD3D12CommandContext& InCommandContext, FD3D12ExplicitDescriptorCache& InDescriptorCache)
: CommandContext(InCommandContext)
, DescriptorCache(InDescriptorCache)
{
}
void SetUniformBuffer(uint32 BaseSlotIndex, uint32 DescriptorIndex, FD3D12UniformBuffer* UniformBuffer)
{
// don't have to do anything
}
void SetRootCBV(uint32 BaseSlotIndex, uint32 DescriptorIndex, D3D12_GPU_VIRTUAL_ADDRESS Address)
{
CommandContext.GraphicsCommandList()->SetComputeRootConstantBufferView(BaseSlotIndex + DescriptorIndex, Address);
}
void SetRootSRV(uint32 BaseSlotIndex, uint32 DescriptorIndex, D3D12_GPU_VIRTUAL_ADDRESS Address)
{
CommandContext.GraphicsCommandList()->SetComputeRootShaderResourceView(BaseSlotIndex + DescriptorIndex, Address);
}
void SetRootDescriptorTable(uint32 SlotIndex, D3D12_GPU_DESCRIPTOR_HANDLE DescriptorTable)
{
CommandContext.GraphicsCommandList()->SetComputeRootDescriptorTable(SlotIndex, DescriptorTable);
}
FD3D12ConstantBufferView* SetLooseParameterData(const void* Data, uint32 DataSize, D3D12_GPU_VIRTUAL_ADDRESS& OutGPUVirtualAddress)
{
checkf(0, TEXT("Loose parameters are not implemented for global ray tracing shaders (raygen, miss, callable)"));
return nullptr;
}
void AddReferencedShaderResource(FD3D12BaseShaderResource* ShaderResource)
{
CommandContext.UpdateResidency(ShaderResource->GetResource());
}
void AddReferencedTexture(FRHITexture* RHITexture)
{
FD3D12Texture* Texture = FD3D12CommandContext::RetrieveTexture(RHITexture, CommandContext.GetGPUIndex());
CommandContext.UpdateResidency(Texture->ResourceLocation.GetResource());
}
void AddReferencedUniformBuffer(uint32 BaseSlotIndex, uint32 DescriptorIndex, FD3D12UniformBuffer* UniformBuffer)
{
CommandContext.UpdateResidency(UniformBuffer->ResourceLocation.GetResource());
}
void AddRayTracingSceneReference(FD3D12RayTracingScene* D3D12RayTracingScene)
{
D3D12RayTracingScene->UpdateResidency(CommandContext);
}
FD3D12Device* GetDevice()
{
return CommandContext.GetParentDevice();
}
#if ENABLE_RHI_VALIDATION
RHIValidation::FTracker* GetValidationTracker()
{
return CommandContext.Tracker;
}
#endif
FD3D12CommandContext& CommandContext;
FD3D12ExplicitDescriptorCache& DescriptorCache;
static constexpr uint32 WorkerIndex = 0;
};
struct FD3D12RayTracingLocalResourceBinder
{
FD3D12RayTracingLocalResourceBinder(
FD3D12Device& InDevice,
FD3D12RayTracingShaderBindingTableInternal& InShaderTable,
const FD3D12RootSignature& InRootSignature,
ERayTracingLocalShaderBindingType InBindingType,
uint32 InRecordIndex,
uint32 InWorkerIndex,
ERayTracingBindingType RTBindingType)
: Device(InDevice)
, ShaderTable(InShaderTable)
, DescriptorCache(*InShaderTable.DescriptorCache)
, RootSignature(InRootSignature)
, BindingType(InBindingType)
, RecordIndex(InRecordIndex)
, WorkerIndex(InWorkerIndex)
{
check(InShaderTable.DescriptorCache != nullptr);
check(WorkerIndex < InShaderTable.MaxBindingWorkers);
check(WorkerIndex < uint32(DescriptorCache.WorkerData.Num()));
check(RecordIndex != ~0u);
switch (RTBindingType)
{
case ERayTracingBindingType::CallableShader:
ShaderTableOffset = InShaderTable.CallableShaderTableOffset;
break;
case ERayTracingBindingType::HitGroup:
ShaderTableOffset = InShaderTable.HitGroupShaderTableOffset;
break;
case ERayTracingBindingType::MissShader:
ShaderTableOffset = InShaderTable.MissShaderTableOffset;
break;
default:
checkNoEntry();
}
}
uint32 ComputeOffsetWithinRootSignature(uint32 BaseSlotIndex, uint32 DescriptorIndex)
{
const uint32 BindOffsetBase = RootSignature.GetBindSlotOffsetInBytes(BaseSlotIndex);
const uint32 DescriptorSize = uint32(sizeof(D3D12_GPU_VIRTUAL_ADDRESS));
return BindOffsetBase + DescriptorIndex * DescriptorSize;
}
void SetRootDescriptor(uint32 BaseSlotIndex, uint32 DescriptorIndex, D3D12_GPU_VIRTUAL_ADDRESS Address)
{
const uint32 OffsetWithinRootSignature = ComputeOffsetWithinRootSignature(BaseSlotIndex, DescriptorIndex);
if (BindingType == ERayTracingLocalShaderBindingType::Validation)
{
ShaderTable.CompareLocalShaderParameters(ShaderTableOffset, RecordIndex, OffsetWithinRootSignature, Address);
}
else
{
ShaderTable.SetLocalShaderParameters(ShaderTableOffset, RecordIndex, OffsetWithinRootSignature, Address);
}
}
void SetRootCBV(uint32 BaseSlotIndex, uint32 DescriptorIndex, D3D12_GPU_VIRTUAL_ADDRESS Address)
{
SetRootDescriptor(BaseSlotIndex, DescriptorIndex, Address);
}
void SetRootSRV(uint32 BaseSlotIndex, uint32 DescriptorIndex, D3D12_GPU_VIRTUAL_ADDRESS Address)
{
SetRootDescriptor(BaseSlotIndex, DescriptorIndex, Address);
}
void SetRootDescriptorTable(uint32 SlotIndex, D3D12_GPU_DESCRIPTOR_HANDLE DescriptorTable)
{
const uint32 BindOffset = RootSignature.GetBindSlotOffsetInBytes(SlotIndex);
if (BindingType == ERayTracingLocalShaderBindingType::Validation)
{
ShaderTable.CompareLocalShaderParameters(ShaderTableOffset, RecordIndex, BindOffset, DescriptorTable);
}
else
{
ShaderTable.SetLocalShaderParameters(ShaderTableOffset, RecordIndex, BindOffset, DescriptorTable);
}
}
FD3D12ConstantBufferView* SetLooseParameterData(const void* Data, uint32 DataSize, D3D12_GPU_VIRTUAL_ADDRESS& OutGPUVirtualAddress)
{
bHasLooseParameterData = true;
return ShaderTable.SetLooseParameterData(WorkerIndex, RecordIndex, BindingType, Data, DataSize, OutGPUVirtualAddress);
}
void AddReferencedShaderResource(FD3D12BaseShaderResource* ShaderResource)
{
ShaderTable.AddReferencedShaderResource(WorkerIndex, RecordIndex, BindingType, ShaderResource);
}
void AddReferencedTexture(FRHITexture* RHITexture)
{
ShaderTable.AddReferencedTexture(WorkerIndex, RecordIndex, BindingType, RHITexture);
}
void AddReferencedUniformBuffer(uint32 BaseSlotIndex, uint32 DescriptorIndex, FD3D12UniformBuffer* UniformBuffer)
{
const uint32 OffsetWithinRootSignature = ComputeOffsetWithinRootSignature(BaseSlotIndex, DescriptorIndex);
ShaderTable.AddReferencedUniformBuffer(WorkerIndex, RecordIndex, ShaderTableOffset, OffsetWithinRootSignature, BindingType, UniformBuffer);
}
void AddRayTracingSceneReference(FD3D12RayTracingScene* D3D12RayTracingScene)
{
checkf(false, TEXT("Unexpected RayTracingScene reference in local shader bindings"));
}
FD3D12Device* GetDevice()
{
return &Device;
}
#if ENABLE_RHI_VALIDATION
RHIValidation::FTracker* GetValidationTracker()
{
// We can't validate resource states in RHISetBindingsOnShaderBindingTable because there's no command context at that point, and because the states will
// change before the raytracing command is dispatched anyway.
return nullptr;
}
#endif
FD3D12Device& Device;
FD3D12RayTracingShaderBindingTableInternal& ShaderTable;
FD3D12ExplicitDescriptorCache& DescriptorCache;
const FD3D12RootSignature& RootSignature;
ERayTracingLocalShaderBindingType BindingType;
uint32 ShaderTableOffset = 0;
uint32 RecordIndex = ~0u;
uint32 WorkerIndex = 0;
bool bHasLooseParameterData = false;
TArray<FD3D12View*, TInlineAllocator<MAX_SRVS + MAX_UAVS>> ReferencedViews;
TArray<FD3D12Resource*, TInlineAllocator<MAX_CBS>> ReferencedCBResources;
};
template <typename ResourceBinderType>
static bool SetRayTracingShaderResources(
const FD3D12RayTracingShader* Shader,
const FD3D12RootSignature* RootSignature,
uint32 InNumBindlessParameters, FRHIShaderParameterResource const* BindlessParameters,
uint32 InNumTextures, FRHITexture* const* Textures,
uint32 InNumSRVs, FRHIShaderResourceView* const* SRVs,
uint32 InNumUniformBuffers, FRHIUniformBuffer* const* UniformBuffers,
uint32 InNumSamplers, FRHISamplerState* const* Samplers,
uint32 InNumUAVs, FRHIUnorderedAccessView* const* UAVs,
uint32 InLooseParameterDataSize, const void* InLooseParameterData,
ResourceBinderType& Binder)
{
check(Shader && RootSignature);
struct FBindings
{
FBindings(ResourceBinderType& InBinder, uint32 InGPUIndex, const FD3D12ShaderData* ShaderData)
: Binder(InBinder)
, GPUIndex(InGPUIndex)
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
, bBindlessResources(EnumHasAnyFlags(ShaderData->ResourceCounts.UsageFlags, EShaderResourceUsageFlags::BindlessResources))
, bBindlessSamplers(EnumHasAnyFlags(ShaderData->ResourceCounts.UsageFlags, EShaderResourceUsageFlags::BindlessSamplers))
#endif
{
}
ResourceBinderType& Binder;
uint32 GPUIndex;
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
const bool bBindlessResources;
const bool bBindlessSamplers;
#endif
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
D3D12_CPU_DESCRIPTOR_HANDLE LocalCBVs[MAX_CBS];
#endif
D3D12_GPU_VIRTUAL_ADDRESS RemoteCBVs[MAX_CBS];
FD3D12UniformBuffer* UniformBuffers[MAX_CBS] = {};
D3D12_CPU_DESCRIPTOR_HANDLE LocalSRVs[MAX_SRVS];
D3D12_CPU_DESCRIPTOR_HANDLE LocalUAVs[MAX_UAVS];
D3D12_CPU_DESCRIPTOR_HANDLE LocalSamplers[MAX_SAMPLERS];
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
uint32 CBVVersions[MAX_CBS];
#endif
uint32 SRVVersions[MAX_SRVS];
uint32 UAVVersions[MAX_SRVS];
uint32 SamplerVersions[MAX_SRVS];
TArray<FD3D12RayTracingScene*, TInlineAllocator<1>> ReferencedRayTracingScenes;
uint64 BoundSRVMask = 0;
uint64 BoundCBVMask = 0;
uint64 BoundUAVMask = 0;
uint64 BoundSamplerMask = 0;
void SetUAV(FRHIUnorderedAccessView* RHIUAV, uint8 Index)
{
FD3D12UnorderedAccessView* UAV = FD3D12CommandContext::RetrieveObject<FD3D12UnorderedAccessView_RHI>(RHIUAV, GPUIndex);
check(UAV != nullptr);
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
if (!bBindlessResources)
#endif
{
FD3D12OfflineDescriptor Descriptor = UAV->GetOfflineCpuHandle();
LocalUAVs[Index] = Descriptor;
UAVVersions[Index] = Descriptor.GetVersion();
BoundUAVMask |= 1ull << Index;
}
Binder.AddReferencedShaderResource(UAV->GetShaderResource());
}
void SetSRV(FRHIShaderResourceView* RHISRV, uint8 Index)
{
FD3D12ShaderResourceView* SRV = FD3D12CommandContext::RetrieveObject<FD3D12ShaderResourceView_RHI>(RHISRV, GPUIndex);
check(SRV != nullptr);
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
if (!bBindlessResources)
#endif
{
FD3D12OfflineDescriptor Descriptor = SRV->GetOfflineCpuHandle();
LocalSRVs[Index] = Descriptor;
SRVVersions[Index] = Descriptor.GetVersion();
BoundSRVMask |= 1ull << Index;
}
Binder.AddReferencedShaderResource(SRV->GetShaderResource());
FD3D12RayTracingScene* ReferencedRayTracingScene = SRV->GetRayTracingScene();
if (ReferencedRayTracingScene)
{
ReferencedRayTracingScenes.Add(ReferencedRayTracingScene);
}
}
void SetTexture(FRHITexture* RHITexture, uint8 Index)
{
FD3D12ShaderResourceView* SRV = FD3D12CommandContext::RetrieveTexture(RHITexture, GPUIndex)->GetShaderResourceView();
if (!ensure(SRV))
{
SRV = FD3D12CommandContext::RetrieveTexture(GBlackTexture->TextureRHI, GPUIndex)->GetShaderResourceView();
}
check(SRV != nullptr);
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
if (!bBindlessResources)
#endif
{
FD3D12OfflineDescriptor Descriptor = SRV->GetOfflineCpuHandle();
LocalSRVs[Index] = Descriptor;
SRVVersions[Index] = Descriptor.GetVersion();
BoundSRVMask |= 1ull << Index;
}
Binder.AddReferencedTexture(RHITexture);
}
void SetResourceCollection(FRHIResourceCollection* ResourceCollection, uint8 Index)
{
FD3D12ResourceCollection* D3D12ResourceCollection = FD3D12CommandContext::RetrieveObject<FD3D12ResourceCollection>(ResourceCollection, GPUIndex);
FD3D12ShaderResourceView* SRV = D3D12ResourceCollection ? D3D12ResourceCollection->GetShaderResourceView() : nullptr;
check(SRV != nullptr);
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
if (!bBindlessResources)
#endif
{
FD3D12OfflineDescriptor Descriptor = SRV->GetOfflineCpuHandle();
LocalSRVs[Index] = Descriptor;
SRVVersions[Index] = Descriptor.GetVersion();
}
BoundSRVMask |= 1ull << Index;
Binder.AddReferencedShaderResource(SRV->GetShaderResource());
}
void SetSampler(FRHISamplerState* RHISampler, uint8 Index)
{
FD3D12SamplerState* Sampler = FD3D12CommandContext::RetrieveObject<FD3D12SamplerState>(RHISampler, GPUIndex);
check(Sampler != nullptr);
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
if (!bBindlessSamplers)
#endif
{
FD3D12OfflineDescriptor Descriptor = Sampler->OfflineDescriptor;
LocalSamplers[Index] = Descriptor;
SamplerVersions[Index] = Descriptor.GetVersion();
BoundSamplerMask |= 1ull << Index;
}
}
};
FBindings Bindings(Binder, Binder.GetDevice()->GetGPUIndex(), Shader);
#if PLATFORM_SUPPORTS_BINDLESS_RENDERING
for (uint32 BindlessParameterIndex = 0; BindlessParameterIndex < InNumBindlessParameters; ++BindlessParameterIndex)
{
const FRHIShaderParameterResource& ShaderParameterResource = BindlessParameters[BindlessParameterIndex];
if (FRHIResource* Resource = ShaderParameterResource.Resource)
{
switch (ShaderParameterResource.Type)
{
case FRHIShaderParameterResource::EType::Texture:
Bindings.SetTexture(static_cast<FRHITexture*>(Resource), BindlessParameterIndex);
break;
case FRHIShaderParameterResource::EType::ResourceView:
Bindings.SetSRV(static_cast<FRHIShaderResourceView*>(Resource), BindlessParameterIndex);
break;
case FRHIShaderParameterResource::EType::UnorderedAccessView:
Bindings.SetUAV(static_cast<FRHIUnorderedAccessView*>(Resource), BindlessParameterIndex);
break;
case FRHIShaderParameterResource::EType::Sampler:
Bindings.SetSampler(static_cast<FRHISamplerState*>(Resource), BindlessParameterIndex);
break;
}
}
}
#endif
for (uint32 TextureIndex = 0; TextureIndex < InNumTextures; ++TextureIndex)
{
FRHITexture* Resource = Textures[TextureIndex];
if (Resource)
{
Bindings.SetTexture(Resource, TextureIndex);
}
}
for (uint32 SRVIndex = 0; SRVIndex < InNumSRVs; ++SRVIndex)
{
FRHIShaderResourceView* Resource = SRVs[SRVIndex];
if (Resource)
{
Bindings.SetSRV(Resource, SRVIndex);
}
}
for (uint32 CBVIndex = 0; CBVIndex < InNumUniformBuffers; ++CBVIndex)
{
FRHIUniformBuffer* Resource = UniformBuffers[CBVIndex];
if (Resource)
{
FD3D12UniformBuffer* CBV = FD3D12CommandContext::RetrieveObject<FD3D12UniformBuffer>(Resource, Bindings.GPUIndex);
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
FD3D12OfflineDescriptor Descriptor = CBV->View->GetOfflineCpuHandle();
Bindings.LocalCBVs[CBVIndex] = Descriptor;
Bindings.CBVVersions[CBVIndex] = Descriptor.GetVersion();
#endif // D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
Bindings.RemoteCBVs[CBVIndex] = CBV->ResourceLocation.GetGPUVirtualAddress();
Bindings.BoundCBVMask |= 1ull << CBVIndex;
Bindings.UniformBuffers[CBVIndex] = CBV;
}
}
for (uint32 SamplerIndex = 0; SamplerIndex < InNumSamplers; ++SamplerIndex)
{
FRHISamplerState* Resource = Samplers[SamplerIndex];
if (Resource)
{
Bindings.SetSampler(Resource, SamplerIndex);
}
}
for (uint32 UAVIndex = 0; UAVIndex < InNumUAVs; ++UAVIndex)
{
FRHIUnorderedAccessView* Resource = UAVs[UAVIndex];
if (Resource)
{
Bindings.SetUAV(Resource, UAVIndex);
}
}
{
uint32 DirtyUniformBuffers = ~(0u);
UE::RHI::Private::SetUniformBufferResourcesFromTables(
Bindings
, *Shader
, DirtyUniformBuffers
, UniformBuffers
#if ENABLE_RHI_VALIDATION
, Binder.GetValidationTracker()
#endif
);
}
// Bind loose parameters
if (Shader->UsesGlobalUniformBuffer())
{
checkf(InLooseParameterDataSize && InLooseParameterData, TEXT("Shader uses global uniform buffer, but the required loose parameter data is not provided."));
}
if (InLooseParameterData && Shader->UsesGlobalUniformBuffer())
{
D3D12_GPU_VIRTUAL_ADDRESS LooseParameterGPUVirtualAddress = 0;
FD3D12ConstantBufferView* ConstantBufferView = Binder.SetLooseParameterData(InLooseParameterData, InLooseParameterDataSize, LooseParameterGPUVirtualAddress);
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
Bindings.LocalCBVs[LooseParameterCBVIndex] = ConstantBufferView->GetOfflineCpuHandle();
#endif // D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
Bindings.RemoteCBVs[LooseParameterCBVIndex] = LooseParameterGPUVirtualAddress;
Bindings.BoundCBVMask |= 1ull << LooseParameterCBVIndex;
}
// Validate that all resources required by the shader are set
auto IsCompleteBinding = [](uint32 ExpectedCount, uint64 BoundMask)
{
if (ExpectedCount > 64) return false; // Bound resource mask can't be represented by uint64
// All bits of the mask [0..ExpectedCount) are expected to be set
uint64 ExpectedMask = ExpectedCount == 64 ? ~0ull : ((1ull << ExpectedCount) - 1);
return (ExpectedMask & BoundMask) == ExpectedMask;
};
check(IsCompleteBinding(Shader->ResourceCounts.NumSRVs , Bindings.BoundSRVMask));
check(IsCompleteBinding(Shader->ResourceCounts.NumUAVs , Bindings.BoundUAVMask));
check(IsCompleteBinding(Shader->ResourceCounts.NumCBs , Bindings.BoundCBVMask));
check(IsCompleteBinding(Shader->ResourceCounts.NumSamplers, Bindings.BoundSamplerMask));
FD3D12ExplicitDescriptorCache& DescriptorCache = Binder.DescriptorCache;
const uint32 WorkerIndex = Binder.WorkerIndex;
const uint32 NumSRVs = Shader->ResourceCounts.NumSRVs;
if (NumSRVs)
{
const int32 DescriptorTableBaseIndex = DescriptorCache.AllocateDeduplicated(Bindings.SRVVersions, Bindings.LocalSRVs, NumSRVs, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, WorkerIndex);
if (DescriptorTableBaseIndex < 0)
{
return false;
}
const uint32 BindSlot = RootSignature->SRVRDTBindSlot(SF_Compute);
check(BindSlot != 0xFF);
const D3D12_GPU_DESCRIPTOR_HANDLE ResourceDescriptorTableBaseGPU = DescriptorCache.ViewHeap.GetDescriptorGPU(DescriptorTableBaseIndex);
Binder.SetRootDescriptorTable(BindSlot, ResourceDescriptorTableBaseGPU);
}
const uint32 NumUAVs = Shader->ResourceCounts.NumUAVs;
if (NumUAVs)
{
const int32 DescriptorTableBaseIndex = DescriptorCache.AllocateDeduplicated(Bindings.UAVVersions, Bindings.LocalUAVs, NumUAVs, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, WorkerIndex);
if (DescriptorTableBaseIndex < 0)
{
return false;
}
const uint32 BindSlot = RootSignature->UAVRDTBindSlot(SF_Compute);
check(BindSlot != 0xFF);
const D3D12_GPU_DESCRIPTOR_HANDLE ResourceDescriptorTableBaseGPU = DescriptorCache.ViewHeap.GetDescriptorGPU(DescriptorTableBaseIndex);
Binder.SetRootDescriptorTable(BindSlot, ResourceDescriptorTableBaseGPU);
}
const uint32 NumCBVs = Shader->ResourceCounts.NumCBs;
if (Shader->ResourceCounts.NumCBs)
{
#if D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
if (!EnumHasAllFlags(Shader->ResourceCounts.UsageFlags, EShaderResourceUsageFlags::BindlessResources))
{
const uint32 DescriptorTableBaseIndex = DescriptorCache.AllocateDeduplicated(Bindings.CBVVersions, Bindings.LocalCBVs, NumCBVs, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, WorkerIndex);
const uint32 BindSlot = RootSignature->CBVRDTBindSlot(SF_Compute);
check(BindSlot != 0xFF);
const D3D12_GPU_DESCRIPTOR_HANDLE ResourceDescriptorTableBaseGPU = DescriptorCache.ViewHeap.GetDescriptorGPU(DescriptorTableBaseIndex);
Binder.SetRootDescriptorTable(BindSlot, ResourceDescriptorTableBaseGPU);
}
else
#endif // D3D12RHI_USE_CONSTANT_BUFFER_VIEWS
{
checkf(RootSignature->CBVRDTBindSlot(SF_Compute) == 0xFF, TEXT("Root CBV descriptor tables are not implemented for ray tracing shaders."));
const uint32 BindSlot = RootSignature->CBVRDBaseBindSlot(SF_Compute);
check(BindSlot != 0xFF);
for (uint32 i = 0; i < Shader->ResourceCounts.NumCBs; ++i)
{
const uint64 SlotMask = (1ull << i);
D3D12_GPU_VIRTUAL_ADDRESS BufferAddress = (Bindings.BoundCBVMask & SlotMask) ? Bindings.RemoteCBVs[i] : 0;
Binder.SetRootCBV(BindSlot, i, BufferAddress);
// Also set the uniform buffer in case it's valid so it can be notified about changes when it's a persistent binding
if (Bindings.UniformBuffers[i])
{
Binder.AddReferencedUniformBuffer(BindSlot, i, Bindings.UniformBuffers[i]);
}
}
}
}
// Bind samplers
const uint32 NumSamplers = Shader->ResourceCounts.NumSamplers;
if (NumSamplers)
{
const int32 DescriptorTableBaseIndex = DescriptorCache.AllocateDeduplicated(Bindings.SamplerVersions, Bindings.LocalSamplers, NumSamplers, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, WorkerIndex);
if (DescriptorTableBaseIndex < 0)
{
return false;
}
const uint32 BindSlot = RootSignature->SamplerRDTBindSlot(SF_Compute);
check(BindSlot != 0xFF);
const D3D12_GPU_DESCRIPTOR_HANDLE ResourceDescriptorTableBaseGPU = DescriptorCache.SamplerHeap.GetDescriptorGPU(DescriptorTableBaseIndex);
Binder.SetRootDescriptorTable(BindSlot, ResourceDescriptorTableBaseGPU);
}
for (FD3D12RayTracingScene* RayTracingScene : Bindings.ReferencedRayTracingScenes)
{
Binder.AddRayTracingSceneReference(RayTracingScene);
}
return true;
}
template <typename ResourceBinderType>
static bool SetRayTracingShaderResources(
const FD3D12RayTracingShader* Shader,
const FD3D12RootSignature* RootSignature,
const FRayTracingShaderBindings& ResourceBindings,
ResourceBinderType& Binder)
{
static_assert(
sizeof(ResourceBindings.SRVs) / sizeof(*ResourceBindings.SRVs) == MAX_SRVS,
"Ray Tracing Shader Bindings SRV array size must match D3D12 RHI Limit");
static_assert(
sizeof(ResourceBindings.UniformBuffers) / sizeof(*ResourceBindings.UniformBuffers) == MAX_CBS,
"Ray Tracing Shader Bindings Uniform Buffer array size must match D3D12 RHI Limit");
static_assert(
sizeof(ResourceBindings.Samplers) / sizeof(*ResourceBindings.Samplers) == MAX_SAMPLERS,
"Ray Tracing Shader Bindings Sampler array size must match D3D12 RHI Limit");
static_assert(
sizeof(ResourceBindings.UAVs) / sizeof(*ResourceBindings.UAVs) == MAX_UAVS,
"Ray Tracing Shader Bindings UAV array size must match D3D12 RHI Limit");
return SetRayTracingShaderResources(
Shader,
RootSignature,
ResourceBindings.BindlessParameters.Num(), ResourceBindings.BindlessParameters.GetData(),
UE_ARRAY_COUNT(ResourceBindings.Textures), ResourceBindings.Textures,
UE_ARRAY_COUNT(ResourceBindings.SRVs), ResourceBindings.SRVs,
UE_ARRAY_COUNT(ResourceBindings.UniformBuffers), ResourceBindings.UniformBuffers,
UE_ARRAY_COUNT(ResourceBindings.Samplers), ResourceBindings.Samplers,
UE_ARRAY_COUNT(ResourceBindings.UAVs), ResourceBindings.UAVs,
0, nullptr, // loose parameters
Binder);
}
static void DispatchRays(FD3D12CommandContext& CommandContext,
const FRayTracingShaderBindings& GlobalBindings,
const FD3D12RayTracingPipelineState* Pipeline,
uint32 RayGenShaderIndex,
FD3D12RayTracingShaderBindingTableInternal* OptShaderTable,
const D3D12_DISPATCH_RAYS_DESC& DispatchDesc,
ED3D12QueueType QueueType,
FD3D12Buffer* ArgumentBuffer = nullptr, uint32 ArgumentOffset = 0)
{
SCOPE_CYCLE_COUNTER(STAT_D3D12DispatchRays);
// TODO: add optional validation that all (used/valid) shader identifiers used in the SBT are also available in the RTPSO
FD3D12Device* Device = CommandContext.GetParentDevice();
FD3D12Adapter* Adapter = Device->GetParentAdapter();
FD3D12Buffer* DispatchRaysDescBuffer = nullptr;
if (ArgumentBuffer)
{
// Source indirect argument buffer only contains the dispatch dimensions, however D3D12 requires a full D3D12_DISPATCH_RAYS_DESC structure.
// We create a new buffer, fill the SBT pointers on CPU and copy the dispatch dimensions into the right place.
DispatchRaysDescBuffer = Device->GetRayTracingDispatchRaysDescBuffer(QueueType);
FD3D12Resource* DispatchRaysDescBufferResource = DispatchRaysDescBuffer->GetResource();
CommandContext.TransitionResource(DispatchRaysDescBufferResource, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_COPY_DEST, 0);
CommandContext.TransitionResource(ArgumentBuffer->GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE|D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_COPY_SOURCE, 0);
CommandContext.FlushResourceBarriers();
// Compute the allocation & copy sizes
uint32 DispatchRayDescSize = sizeof(D3D12_DISPATCH_RAYS_DESC);
uint32 SBTPartSize = offsetof(D3D12_DISPATCH_RAYS_DESC, Width);
uint32 IndirectDimensionSize = DispatchRayDescSize - SBTPartSize;
static_assert((sizeof(D3D12_DISPATCH_RAYS_DESC) - offsetof(D3D12_DISPATCH_RAYS_DESC, Width)) == sizeof(uint32) * 4, "Assume 4 uints at the end of the struct to store the dimension + alignment overhead");
uint32 BaseRayDescBufferOffset = DispatchRaysDescBuffer->ResourceLocation.GetOffsetFromBaseOfResource();
// Copy SBT data part of the dispatch desc to upload memory
FD3D12ResourceLocation UploadResourceLocation(Device);
void* Data = Device->GetDefaultFastAllocator().Allocate(DispatchRayDescSize, 256, &UploadResourceLocation);
FMemory::Memcpy(Data, &DispatchDesc, SBTPartSize);
// Copy SBT data part to resource
CommandContext.CopyBufferRegionChecked(
DispatchRaysDescBufferResource->GetResource(), DispatchRaysDescBufferResource->GetName(),
BaseRayDescBufferOffset,
UploadResourceLocation.GetResource()->GetResource(), UploadResourceLocation.GetResource()->GetName(),
UploadResourceLocation.GetOffsetFromBaseOfResource(),
SBTPartSize
);
// Copy GPU computed indirect args to resource
CommandContext.CopyBufferRegionChecked(
DispatchRaysDescBufferResource->GetResource(), DispatchRaysDescBufferResource->GetName(),
BaseRayDescBufferOffset + SBTPartSize,
ArgumentBuffer->GetResource()->GetResource(), ArgumentBuffer->GetResource()->GetName(),
ArgumentBuffer->ResourceLocation.GetOffsetFromBaseOfResource() + ArgumentOffset,
IndirectDimensionSize
);
CommandContext.TransitionResource(
DispatchRaysDescBufferResource,
D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
0
);
CommandContext.TransitionResource(ArgumentBuffer->GetResource(),
D3D12_RESOURCE_STATE_COPY_SOURCE,
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
0
);
CommandContext.FlushResourceBarriers();
}
// Setup state for RT dispatch
// Invalidate state cache to ensure all root parameters for regular shaders are reset when non-RT work is dispatched later.
CommandContext.StateCache.TransitionComputeState(ED3D12PipelineType::RayTracing);
CommandContext.GraphicsCommandList();
FD3D12RayTracingShader* RayGenShader = Pipeline->RayGenShaders.Shaders[RayGenShaderIndex];
const FRHIShaderBindingLayout& ShaderBindingLayout = CommandContext.GetShaderBindingLayout();
check(RayGenShader->ShaderBindingLayoutHash == ShaderBindingLayout.GetHash());
const TArray<FRHIUniformBuffer*>& StaticUniformBuffers = CommandContext.GetStaticUniformBuffers();
const FD3D12RootSignature* GlobalRTRootSignature = Adapter->GetGlobalRayTracingRootSignature(ShaderBindingLayout);
bool bResourcesBound = false;
if (OptShaderTable && OptShaderTable->DescriptorCache)
{
FD3D12ExplicitDescriptorCache* DescriptorCache = OptShaderTable->DescriptorCache;
check(DescriptorCache != nullptr);
UE::TScopeLock Lock(OptShaderTable->DispatchMutex);
TRACE_CPUPROFILER_EVENT_SCOPE(SetRayTracingShaderResources);
CommandContext.SetExplicitDescriptorCache(*DescriptorCache);
CommandContext.GraphicsCommandList()->SetComputeRootSignature(Pipeline->GlobalRootSignature);
FD3D12RayTracingGlobalResourceBinder ResourceBinder(CommandContext, *DescriptorCache);
bResourcesBound = SetRayTracingShaderResources(RayGenShader, GlobalRTRootSignature, GlobalBindings, ResourceBinder);
OptShaderTable->UpdateResidency(CommandContext);
}
else
{
FD3D12ExplicitDescriptorCache TransientDescriptorCache(CommandContext.GetParentDevice(), FD3D12RayTracingShaderBindingTableInternal::MaxBindingWorkers);
TransientDescriptorCache.Init(0, MAX_SRVS + MAX_UAVS, MAX_SAMPLERS, ERHIBindlessConfiguration::RayTracing);
CommandContext.SetExplicitDescriptorCache(TransientDescriptorCache);
CommandContext.GraphicsCommandList()->SetComputeRootSignature(Pipeline->GlobalRootSignature);
FD3D12RayTracingGlobalResourceBinder ResourceBinder(CommandContext, TransientDescriptorCache);
bResourcesBound = SetRayTracingShaderResources(RayGenShader, GlobalRTRootSignature, GlobalBindings, ResourceBinder);
}
// Bind diagnostic buffer to allow asserts in ray generation shaders
CommandContext.BindDiagnosticBuffer(GlobalRTRootSignature, ED3D12PipelineType::Compute);
int8 StaticShaderBindingSlot = GlobalRTRootSignature->GetStaticShaderBindingSlot();
if (StaticShaderBindingSlot >= 0)
{
for (uint32 Index = 0; Index < ShaderBindingLayout.GetNumUniformBufferEntries(); ++Index)
{
const FRHIUniformBufferShaderBindingLayout& LayoutEntry = ShaderBindingLayout.GetUniformBufferEntry(Index);
const uint32 RootParameterSlotIndex = uint32(StaticShaderBindingSlot) + LayoutEntry.CBVResourceIndex;
FRHIUniformBuffer* UniformBuffer = StaticUniformBuffers[Index];
checkf(UniformBuffer, TEXT("Static uniform buffer at index %d is referenced in the shader binding layout but not provided in the last RHISetStaticUniformBuffers() command"), Index);
FD3D12UniformBuffer* D3D12UniformBuffer = FD3D12CommandContext::RetrieveObject<FD3D12UniformBuffer>(UniformBuffer, Device->GetGPUIndex());
if (D3D12UniformBuffer->ResourceLocation.GetGPUVirtualAddress())
{
const FD3D12ResourceLocation& ResourceLocation = D3D12UniformBuffer->ResourceLocation;
CommandContext.GraphicsCommandList()->SetComputeRootConstantBufferView(RootParameterSlotIndex, ResourceLocation.GetGPUVirtualAddress());
}
}
}
if (bResourcesBound)
{
CommandContext.FlushResourceBarriers();
ID3D12StateObject* RayTracingStateObject = nullptr;
// Select a specialized RTPSO, if one is available
if (GRayTracingAllowSpecializedStateObjects
&& !Pipeline->SpecializedStateObjects.IsEmpty()
&& !Pipeline->SpecializationIndices.IsEmpty())
{
int32 SpecializationIndex = Pipeline->SpecializationIndices[RayGenShaderIndex];
if (SpecializationIndex != INDEX_NONE)
{
RayTracingStateObject = Pipeline->SpecializedStateObjects[SpecializationIndex];
}
}
// Fall back to default full RTPSO if specialization is not available
if (!RayTracingStateObject)
{
RayTracingStateObject = Pipeline->StateObject.GetReference();
}
Pipeline->FrameCounter.Set(CommandContext.GetFrameFenceCounter());
CommandContext.RayTracingCommandList()->SetPipelineState1(RayTracingStateObject);
if (DispatchRaysDescBuffer)
{
ID3D12CommandSignature* CommandSignature = Adapter->GetDispatchRaysIndirectCommandSignature();
CommandContext.RayTracingCommandList()->ExecuteIndirect(
CommandSignature,
1,
DispatchRaysDescBuffer->ResourceLocation.GetResource()->GetResource(),
DispatchRaysDescBuffer->ResourceLocation.GetOffsetFromBaseOfResource(),
nullptr,
0
);
}
else
{
CommandContext.RayTracingCommandList()->DispatchRays(&DispatchDesc);
}
#if (RHI_NEW_GPU_PROFILER == 0)
if (CommandContext.IsDefaultContext())
{
CommandContext.GetParentDevice()->RegisterGPUWork(1);
}
#endif
}
// Restore old global descriptor heaps
CommandContext.UnsetExplicitDescriptorCache();
}
void FD3D12CommandContext::RHIRayTraceDispatch(FRHIRayTracingPipelineState* InRayTracingPipelineState, FRHIRayTracingShader* RayGenShaderRHI,
FRHIShaderBindingTable* InSBT, const FRayTracingShaderBindings& GlobalResourceBindings,
uint32 Width, uint32 Height)
{
TRACE_CPUPROFILER_EVENT_SCOPE(RHIRayTraceDispatch);
const FD3D12RayTracingPipelineState* Pipeline = FD3D12DynamicRHI::ResourceCast(InRayTracingPipelineState);
FD3D12RayTracingShaderBindingTable* SBT = FD3D12DynamicRHI::ResourceCast(InSBT);
FD3D12RayTracingShaderBindingTableInternal* ShaderTableForDevice = SBT->GetTableForDevice(GetParentDevice());
checkf(!ShaderTableForDevice->bIsDirty, TEXT("The shader table contains pending modifications. CommitRayTracingBindings must be called after SetRayTracingBindings"));
FD3D12RayTracingShader* RayGenShader = FD3D12DynamicRHI::ResourceCast(RayGenShaderRHI);
const int32 RayGenShaderIndex = Pipeline->RayGenShaders.Find(RayGenShader->GetHash());
checkf(RayGenShaderIndex != INDEX_NONE,
TEXT("RayGen shader '%s' is not present in the given ray tracing pipeline. ")
TEXT("All RayGen shaders must be declared when creating RTPSO."),
*(RayGenShader->EntryPoint));
const FD3D12ShaderIdentifier& RayGenShaderIdentifier = Pipeline->RayGenShaders.Identifiers[RayGenShaderIndex];
D3D12_DISPATCH_RAYS_DESC DispatchDesc = ShaderTableForDevice->GetDispatchRaysDesc(GetParentDevice(), RayGenShaderIdentifier);
DispatchDesc.Width = Width;
DispatchDesc.Height = Height;
DispatchDesc.Depth = 1;
DispatchRays(*this, GlobalResourceBindings, Pipeline, RayGenShaderIndex, ShaderTableForDevice, DispatchDesc, QueueType);
}
void FD3D12CommandContext::RHIRayTraceDispatchIndirect(FRHIRayTracingPipelineState* InRayTracingPipelineState, FRHIRayTracingShader* RayGenShaderRHI,
FRHIShaderBindingTable* InSBT, const FRayTracingShaderBindings& GlobalResourceBindings,
FRHIBuffer* ArgumentBuffer, uint32 ArgumentOffset)
{
TRACE_CPUPROFILER_EVENT_SCOPE(RHIRayTraceDispatchIndirect);
checkf(GRHISupportsRayTracingDispatchIndirect, TEXT("RHIRayTraceDispatchIndirect may not be used because DXR 1.1 is not supported on this machine."));
const FD3D12RayTracingPipelineState* Pipeline = FD3D12DynamicRHI::ResourceCast(InRayTracingPipelineState);
FD3D12RayTracingShaderBindingTable* SBT = FD3D12DynamicRHI::ResourceCast(InSBT);
FD3D12RayTracingShaderBindingTableInternal* ShaderTableForDevice = SBT->GetTableForDevice(GetParentDevice());
checkf(!ShaderTableForDevice->bIsDirty, TEXT("The shader table contains pending modifications. CommitRayTracingBindings must be called after SetRayTracingBindings"));
FD3D12RayTracingShader* RayGenShader = FD3D12DynamicRHI::ResourceCast(RayGenShaderRHI);
const int32 RayGenShaderIndex = Pipeline->RayGenShaders.Find(RayGenShader->GetHash());
checkf(RayGenShaderIndex != INDEX_NONE, TEXT("RayGen shader is not present in the given ray tracing pipeline. All RayGen shaders must be declared when creating RTPSO."));
const FD3D12ShaderIdentifier& RayGenShaderIdentifier = Pipeline->RayGenShaders.Identifiers[RayGenShaderIndex];
D3D12_DISPATCH_RAYS_DESC DispatchDesc = ShaderTableForDevice->GetDispatchRaysDesc(GetParentDevice(), RayGenShaderIdentifier);
DispatchRays(*this, GlobalResourceBindings, Pipeline, RayGenShaderIndex, ShaderTableForDevice, DispatchDesc, QueueType, RetrieveObject<FD3D12Buffer>(ArgumentBuffer), ArgumentOffset);
}
static void SetRayTracingHitGroup(
FD3D12Device* Device,
FD3D12RayTracingShaderBindingTableInternal* ShaderTable, uint32 RecordIndex,
FD3D12RayTracingPipelineState* Pipeline, uint32 HitGroupIndex,
const FD3D12RayTracingGeometry* Geometry, uint32 GeometrySegmentIndex,
uint32 NumUniformBuffers, FRHIUniformBuffer* const* UniformBuffers,
uint32 LooseParameterDataSize, const void* LooseParameterData,
uint32 UserData,
ERayTracingLocalShaderBindingType BindingType,
uint32 WorkerIndex)
{
const uint32 GPUIndex = Device->GetGPUIndex();
// If Shader table doesn't support hit group indexing then only set the hit group identifier and it should be first record index
if (ShaderTable->HitGroupIndexingMode == ERayTracingHitGroupIndexingMode::Disallow)
{
check(RecordIndex == 0 && Pipeline);
if (Pipeline)
{
ShaderTable->SetHitGroupIdentifier(RecordIndex, Pipeline->HitGroupShaders.Identifiers[HitGroupIndex]);
}
return;
}
checkf(RecordIndex < ShaderTable->NumHitRecords, TEXT("Hit group record index is invalid. Make sure that NumGeometrySegments and NumShaderSlotsPerGeometrySegment is correct in FRayTracingShaderBindingTableInitializer."));
#if DO_CHECK
{
const uint32 NumGeometrySegments = Geometry->GetNumSegments();
checkf(GeometrySegmentIndex < NumGeometrySegments, TEXT("Segment %d is out of range for ray tracing geometry '%s' that contains %d segments"),
GeometrySegmentIndex, Geometry->DebugName.IsNone() ? TEXT("UNKNOWN") : *Geometry->DebugName.ToString(), NumGeometrySegments);
// If SBT and binding are persistent then all uniform buffers need to be allocated multiframe
if (ShaderTable->Lifetime == ERayTracingShaderBindingTableLifetime::Persistent && BindingType == ERayTracingLocalShaderBindingType::Persistent)
{
for (uint32 UBIndex = 0; UBIndex < NumUniformBuffers; ++UBIndex)
{
FRHIUniformBuffer* Resource = UniformBuffers[UBIndex];
if (Resource)
{
FD3D12UniformBuffer* UB = FD3D12CommandContext::RetrieveObject<FD3D12UniformBuffer>(Resource, 0);
checkf(UB->UniformBufferUsage == UniformBuffer_MultiFrame, TEXT("Trying to bind non multiframe uniform buffer to persistent SBT: %s"), *Resource->GetLayout().GetDebugName());
}
}
}
}
#endif // DO_CHECK
ShaderTable->SetHitGroupGeometrySystemParameters(WorkerIndex, BindingType, RecordIndex, Geometry, UserData, GeometrySegmentIndex);
if (EnumHasAnyFlags(ShaderTable->ShaderBindingMode, ERayTracingShaderBindingMode::RTPSO) && Pipeline)
{
const FD3D12RayTracingShader* Shader = Pipeline->HitGroupShaders.Shaders[HitGroupIndex];
FD3D12RayTracingShaderBindingTableInternal::FShaderRecordCacheKey CacheKey;
// TODO: disable RecordCache when using persistent SBT
const bool bCanUseRecordCache = GRayTracingCacheShaderRecords
&& LooseParameterDataSize == 0 // loose parameters end up in unique constant buffers, so SBT records can't be shared
&& NumUniformBuffers > 0 // there is no benefit from cache if no resources are being bound
&& NumUniformBuffers <= CacheKey.MaxUniformBuffers
&& (BindingType == ERayTracingLocalShaderBindingType::Transient || ShaderTable->Lifetime == ERayTracingShaderBindingTableLifetime::Transient); // Only transient SBTs or Transient records can use the record cache because otherwise the per record listeners need to tracked and copied as well
ERayTracingLocalShaderBindingType RTPSOBindingType = BindingType;
if (bCanUseRecordCache)
{
CacheKey = FD3D12RayTracingShaderBindingTableInternal::FShaderRecordCacheKey(NumUniformBuffers, UniformBuffers, HitGroupIndex);
uint32* ExistingRecordIndex = ShaderTable->WorkerData[WorkerIndex].ShaderRecordCache.Find(CacheKey);
if (ExistingRecordIndex)
{
// Simply copy local shader parameters from existing SBT record and set the shader identifier, skipping resource binding work.
const uint32 OffsetFromRootSignatureStart = sizeof(FD3D12HitGroupSystemParameters);
ShaderTable->SetHitGroupIdentifier(RecordIndex, Pipeline->HitGroupShaders.Identifiers[HitGroupIndex]);
ShaderTable->CopyHitGroupParameters(RecordIndex, *ExistingRecordIndex, OffsetFromRootSignatureStart);
#if DO_CHECK
// Validate all other data against copied data
RTPSOBindingType = ERayTracingLocalShaderBindingType::Validation;
#else
return;
#endif // DO_CHECK
}
}
FD3D12RayTracingLocalResourceBinder ResourceBinder(*Device, *ShaderTable, *(Shader->LocalRootSignature), RTPSOBindingType, RecordIndex, WorkerIndex, ERayTracingBindingType::HitGroup);
const bool bResourcesBound = SetRayTracingShaderResources(Shader, Shader->LocalRootSignature,
0, nullptr, // BindlessParameters
0, nullptr, // Textures
0, nullptr, // SRVs
NumUniformBuffers, UniformBuffers,
0, nullptr, // Samplers
0, nullptr, // UAVs
LooseParameterDataSize, LooseParameterData,
ResourceBinder);
if (bCanUseRecordCache && bResourcesBound)
{
ShaderTable->WorkerData[WorkerIndex].ShaderRecordCache.FindOrAdd(CacheKey, RecordIndex);
}
FD3D12ShaderIdentifier ShaderIdentifier = bResourcesBound ? Pipeline->HitGroupShaders.Identifiers[HitGroupIndex] : FD3D12ShaderIdentifier::Null;
if (RTPSOBindingType != ERayTracingLocalShaderBindingType::Validation)
{
ShaderTable->SetHitGroupIdentifier(RecordIndex, ShaderIdentifier);
}
else
{
ShaderTable->CompareHitGroupIdentifier(RecordIndex, ShaderIdentifier);
}
}
}
static void SetRayTracingCallableShader(
FD3D12Device* Device,
FD3D12RayTracingShaderBindingTableInternal* ShaderTable, uint32 RecordIndex,
FD3D12RayTracingPipelineState* Pipeline, uint32 ShaderIndexInPipeline,
uint32 NumUniformBuffers, FRHIUniformBuffer* const* UniformBuffers,
uint32 LooseParameterDataSize, const void* LooseParameterData,
uint32 UserData,
uint32 WorkerIndex)
{
checkf(RecordIndex < ShaderTable->NumCallableRecords, TEXT("Callable shader record index is invalid. Make sure that NumCallableShaderSlots is correct in FRayTracingShaderBindingTableInitializer."));
const uint32 UserDataOffset = offsetof(FD3D12HitGroupSystemParameters, RootConstants) + offsetof(FHitGroupSystemRootConstants, UserData);
ShaderTable->SetCallableShaderParameters(RecordIndex, UserDataOffset, UserData);
const FD3D12ShaderIdentifier* ShaderIdentifier = &FD3D12ShaderIdentifier::Null;
if (ShaderIndexInPipeline != INDEX_NONE)
{
const FD3D12RayTracingShader* Shader = Pipeline->CallableShaders.Shaders[ShaderIndexInPipeline];
FD3D12RayTracingLocalResourceBinder ResourceBinder(*Device, *ShaderTable, *(Shader->LocalRootSignature), ERayTracingLocalShaderBindingType::Transient, RecordIndex, WorkerIndex, ERayTracingBindingType::CallableShader);
const bool bResourcesBound = SetRayTracingShaderResources(Shader, Shader->LocalRootSignature,
0, nullptr, // BindlessParameters
0, nullptr, // Textures
0, nullptr, // SRVs
NumUniformBuffers, UniformBuffers,
0, nullptr, // Samplers
0, nullptr, // UAVs
LooseParameterDataSize, LooseParameterData, // Loose parameters
ResourceBinder);
if (bResourcesBound)
{
ShaderIdentifier = &Pipeline->CallableShaders.Identifiers[ShaderIndexInPipeline];
}
}
ShaderTable->SetCallableIdentifier(RecordIndex, *ShaderIdentifier);
}
static void SetRayTracingMissShader(
FD3D12Device* Device,
FD3D12RayTracingShaderBindingTableInternal* ShaderTable, uint32 RecordIndex,
FD3D12RayTracingPipelineState* Pipeline, uint32 ShaderIndexInPipeline,
uint32 NumUniformBuffers, FRHIUniformBuffer* const* UniformBuffers,
uint32 LooseParameterDataSize, const void* LooseParameterData,
uint32 UserData,
uint32 WorkerIndex)
{
checkf(RecordIndex < ShaderTable->NumMissRecords, TEXT("Miss shader record index is invalid. Make sure that NumMissShaderSlots is correct in FRayTracingShaderBindingTableInitializer."));
const uint32 UserDataOffset = offsetof(FD3D12HitGroupSystemParameters, RootConstants) + offsetof(FHitGroupSystemRootConstants, UserData);
ShaderTable->SetMissShaderParameters(RecordIndex, UserDataOffset, UserData);
const FD3D12RayTracingShader* Shader = Pipeline->MissShaders.Shaders[ShaderIndexInPipeline];
FD3D12RayTracingLocalResourceBinder ResourceBinder(*Device, *ShaderTable, *(Shader->LocalRootSignature), ERayTracingLocalShaderBindingType::Transient, RecordIndex, WorkerIndex, ERayTracingBindingType::MissShader);
const bool bResourcesBound = SetRayTracingShaderResources(Shader, Shader->LocalRootSignature,
0, nullptr, // BindlessParameters
0, nullptr, // Textures
0, nullptr, // SRVs
NumUniformBuffers, UniformBuffers,
0, nullptr, // Samplers
0, nullptr, // UAVs
LooseParameterDataSize, LooseParameterData, // Loose parameters
ResourceBinder);
ShaderTable->SetMissIdentifier(RecordIndex,
bResourcesBound
? Pipeline->MissShaders.Identifiers[ShaderIndexInPipeline]
: FD3D12ShaderIdentifier::Null);
}
void FD3D12CommandContext::RHISetBindingsOnShaderBindingTable(
FRHIShaderBindingTable* InSBT, FRHIRayTracingPipelineState* InPipeline,
uint32 NumBindings, const FRayTracingLocalShaderBindings* Bindings,
ERayTracingBindingType BindingType)
{
TRACE_CPUPROFILER_EVENT_SCOPE(RHISetBindingsOnShaderBindingTable);
SCOPE_CYCLE_COUNTER(STAT_D3D12SetBindingsOnShaderBindingTable);
FD3D12RayTracingShaderBindingTable* SBT = FD3D12DynamicRHI::ResourceCast(InSBT);
FD3D12RayTracingPipelineState* Pipeline = FD3D12DynamicRHI::ResourceCast(InPipeline);
// Pipeline shouldn't contain any shaders which have bigger local data size then currently set in the initializer
// (Otherwise changing of local binding data size would need to supported)
check(Pipeline == nullptr || Pipeline->MaxLocalRootSignatureSize <= SBT->GetInitializer().LocalBindingDataSize);
FD3D12RayTracingShaderBindingTableInternal* ShaderTableForDevice = SBT->GetTableForDevice(GetParentDevice());
FGraphEventArray TaskList;
const uint32 NumWorkerThreads = FMath::Max<uint32>(1, FTaskGraphInterface::Get().GetNumWorkerThreads());
const uint32 MaxTasks = FApp::ShouldUseThreadingForPerformance()
? FMath::Min<uint32>(NumWorkerThreads, FD3D12RayTracingShaderBindingTableInternal::MaxBindingWorkers)
: 1;
struct FTaskContext
{
uint32 WorkerIndex = 0;
};
TArray<FTaskContext, TInlineAllocator<FD3D12RayTracingShaderBindingTableInternal::MaxBindingWorkers>> TaskContexts;
for (uint32 WorkerIndex = 0; WorkerIndex < MaxTasks; ++WorkerIndex)
{
TaskContexts.Add(FTaskContext{WorkerIndex});
}
auto BindingTask = [Bindings, Device = Device, ShaderTableForDevice, Pipeline, BindingType](const FTaskContext& Context, int32 CurrentIndex)
{
const FRayTracingLocalShaderBindings& Binding = Bindings[CurrentIndex];
if (BindingType == ERayTracingBindingType::HitGroup)
{
if (Binding.BindingType != ERayTracingLocalShaderBindingType::Clear)
{
//UE_LOG(LogD3D12RHI, Log, TEXT("Set hit record data for RecordIndex %d on SBT %#016llx with mode: %d"), Binding.RecordIndex, ShaderTableForDevice, Binding.BindingType);
const FD3D12RayTracingGeometry* Geometry = FD3D12DynamicRHI::ResourceCast(Binding.Geometry);
SetRayTracingHitGroup(Device,
ShaderTableForDevice, Binding.RecordIndex,
Pipeline, Binding.ShaderIndexInPipeline,
Geometry, Binding.SegmentIndex,
Binding.NumUniformBuffers,
Binding.UniformBuffers,
Binding.LooseParameterDataSize,
Binding.LooseParameterData,
Binding.UserData,
Binding.BindingType,
Context.WorkerIndex);
}
else
{
//UE_LOG(LogD3D12RHI, Log, TEXT("Clear hit record data for RecordIndex %d on SBT %#016llx"), Binding.RecordIndex, ShaderTableForDevice);
ShaderTableForDevice->ClearHitRecordData(Context.WorkerIndex, Binding.RecordIndex);
}
}
else if (BindingType == ERayTracingBindingType::CallableShader)
{
check(Binding.BindingType == ERayTracingLocalShaderBindingType::Transient);
SetRayTracingCallableShader(Device,
ShaderTableForDevice, Binding.RecordIndex,
Pipeline, Binding.ShaderIndexInPipeline,
Binding.NumUniformBuffers,
Binding.UniformBuffers,
Binding.LooseParameterDataSize,
Binding.LooseParameterData,
Binding.UserData,
Context.WorkerIndex);
}
else if (BindingType == ERayTracingBindingType::MissShader)
{
check(Binding.BindingType == ERayTracingLocalShaderBindingType::Transient);
SetRayTracingMissShader(Device,
ShaderTableForDevice, Binding.RecordIndex,
Pipeline, Binding.ShaderIndexInPipeline,
Binding.NumUniformBuffers,
Binding.UniformBuffers,
Binding.LooseParameterDataSize,
Binding.LooseParameterData,
Binding.UserData,
Context.WorkerIndex);
}
else
{
checkNoEntry();
}
};
// One helper worker task will be created at most per this many work items, plus one worker for current thread (unless running on a task thread),
// up to a hard maximum of FD3D12RayTracingScene::MaxBindingWorkers.
// Internally, parallel for tasks still subdivide the work into smaller chunks and perform fine-grained load-balancing.
const int32 ItemsPerTask = 1024;
ParallelForWithExistingTaskContext(TEXT("SetRayTracingBindings"), MakeArrayView(TaskContexts), NumBindings, ItemsPerTask, BindingTask);
ShaderTableForDevice->bIsDirty = true;
}
#endif // D3D12_RHI_RAYTRACING