// Copyright Epic Games, Inc. All Rights Reserved. #include "MetalStateCache.h" #include "MetalRHIPrivate.h" #include "MetalRHIRenderQuery.h" #include "MetalShaderTypes.h" #include "MetalGraphicsPipelineState.h" #include "MetalProfiler.h" #include "MetalCommandBuffer.h" #include "MetalCommandEncoder.h" #include "MetalVertexDeclaration.h" #include "MetalBindlessDescriptors.h" #include "RHIShaderParametersShared.h" #include "MetalResourceCollection.h" #include "MetalDynamicRHI.h" #include "RHIUniformBufferUtilities.h" #if PLATFORM_MAC #ifndef UINT128_MAX #define UINT128_MAX (((__uint128_t)1 << 127) - (__uint128_t)1 + ((__uint128_t)1 << 127)) #endif #define FMETALTEXTUREMASK_MAX UINT128_MAX #else #define FMETALTEXTUREMASK_MAX UINT32_MAX #endif static MTL::TriangleFillMode TranslateFillMode(ERasterizerFillMode FillMode) { switch (FillMode) { case FM_Wireframe: return MTL::TriangleFillModeLines; case FM_Point: return MTL::TriangleFillModeFill; default: return MTL::TriangleFillModeFill; }; } static MTL::CullMode TranslateCullMode(ERasterizerCullMode CullMode) { switch (CullMode) { case CM_CCW: return MTL::CullModeFront; case CM_CW: return MTL::CullModeBack; default: return MTL::CullModeNone; } } static MTL::DepthClipMode TranslateDepthClipMode(ERasterizerDepthClipMode DepthClipMode) { switch (DepthClipMode) { case ERasterizerDepthClipMode::DepthClip: return MTL::DepthClipModeClip; case ERasterizerDepthClipMode::DepthClamp: return MTL::DepthClipModeClamp; default: return MTL::DepthClipModeClip; } } static MTL::StoreAction ConditionalOverrideStoreAction(MTL::StoreAction StoreAction, bool bIsDepth, bool bIsMSAA) { if (!bIsMSAA) { return StoreAction; } static int Mode = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.Mobile.XRMSAAMode"))->GetValueOnAnyThread(); if (Mode != 0) { // @todo visionos: this needs to be experimented on to find best mode return MTL::StoreActionStoreAndMultisampleResolve; } return StoreAction; } FORCEINLINE MTL::StoreAction GetMetalRTStoreAction(FMetalDevice& Device, ERenderTargetStoreAction StoreAction) { switch(StoreAction) { case ERenderTargetStoreAction::ENoAction: return MTL::StoreActionDontCare; case ERenderTargetStoreAction::EStore: return MTL::StoreActionStore; //default store action in the desktop renderers needs to be MTL::StoreActionStoreAndMultisampleResolve. Trying to express the renderer by the requested maxrhishaderplatform //because we may render to the same MSAA target twice in two separate passes. BasePass, then some stuff, then translucency for example and we need to not lose the prior MSAA contents to do this properly. case ERenderTargetStoreAction::EMultisampleResolve: { static bool bNoMSAA = !AllowMSAA(); static bool bSupportsMSAAStoreResolve = Device.SupportsFeature(EMetalFeaturesMSAAStoreAndResolve) && (GMaxRHIFeatureLevel >= ERHIFeatureLevel::SM5); if (bNoMSAA) { return MTL::StoreActionStore; } else if (bSupportsMSAAStoreResolve) { return MTL::StoreActionStoreAndMultisampleResolve; } else { return MTL::StoreActionMultisampleResolve; } } default: return MTL::StoreActionDontCare; } } FORCEINLINE MTL::StoreAction GetConditionalMetalRTStoreAction(FMetalDevice& Device, bool bMSAATarget) { if (bMSAATarget) { //this func should only be getting called when an encoder had to abnormally break. In this case we 'must' do StoreAndResolve because the encoder will be restarted later //with the original MSAA rendertarget and the original data must still be there to continue the render properly. check(Device.SupportsFeature(EMetalFeaturesMSAAStoreAndResolve)); return MTL::StoreActionStoreAndMultisampleResolve; } else { return MTL::StoreActionStore; } } FMetalStateCache::FMetalStateCache(FMetalDevice& MetalDevice, bool const bInImmediate) : Device(MetalDevice) , DepthStore(MTL::StoreActionUnknown) , StencilStore(MTL::StoreActionUnknown) , VisibilityResults(nullptr) , VisibilityMode(MTL::VisibilityResultModeDisabled) , VisibilityOffset(0) , VisibilityWritten(0) , DepthStencilState(nullptr) , RasterizerState(nullptr) , StencilRef(0) , BlendFactor(FLinearColor::Transparent) , FrameBufferSize(CGSizeMake(0.0, 0.0)) , RenderTargetArraySize(1) , RenderPassDesc(nullptr) , RasterBits(0) , PipelineBits(0) , bIsRenderTargetActive(false) , bHasValidRenderTarget(false) , bHasValidColorTarget(false) , bImmediate(bInImmediate) { FMemory::Memzero(Viewport); FMemory::Memzero(Scissor); ActiveViewports = 0; ActiveScissors = 0; for (uint32 i = 0; i < MaxSimultaneousRenderTargets; i++) { ColorStore[i] = MTL::StoreActionUnknown; } FMemory::Memzero(RenderPassInfo); FMemory::Memzero(DirtyUniformBuffers); #if PLATFORM_SUPPORTS_BINDLESS_RENDERING // Reset Vertex Buffer Offsets. for (uint32 i = 0; i < UE_ARRAY_COUNT(VertexBufferVAs); i++) { VertexBufferVAs[i].addr = 0; VertexBufferVAs[i].stride = 0; VertexBufferVAs[i].length = 0; } // Clear CBV table for (uint32 Frequency = 0; Frequency < EMetalShaderStages::Num; Frequency++) { for (uint32 i = 0; i < TopLevelABNumEntry; i++) { CBVTable[Frequency][i] = 0ull; } } #endif } FMetalStateCache::~FMetalStateCache() { RenderPassDesc = nullptr; for (uint32 i = 0; i < MaxVertexElementCount; i++) { VertexBuffers[i].Buffer = nullptr; VertexBuffers[i].Bytes = nullptr; #if METAL_RHI_RAYTRACING VertexBuffers[i].AccelerationStructure = nullptr; #endif VertexBuffers[i].Length = 0; VertexBuffers[i].Offset = 0; } for (uint32 Frequency = 0; Frequency < EMetalShaderStages::Num; Frequency++) { ShaderSamplers[Frequency].Bound = 0; for (uint32 i = 0; i < ML_MaxSamplers; i++) { ShaderSamplers[Frequency].Samplers[i] = nullptr; } for (uint32 i = 0; i < ML_MaxBuffers; i++) { BoundUniformBuffers[Frequency][i] = nullptr; ShaderBuffers[Frequency].Buffers[i].Buffer = nullptr; #if METAL_RHI_RAYTRACING ShaderBuffers[Frequency].Buffers[i].AccelerationStructure = nullptr; #endif ShaderBuffers[Frequency].Buffers[i].Bytes = nullptr; ShaderBuffers[Frequency].Buffers[i].Length = 0; ShaderBuffers[Frequency].Buffers[i].ElementRowPitch = 0; ShaderBuffers[Frequency].Buffers[i].Offset = 0; ShaderBuffers[Frequency].Buffers[i].Usage = MTL::ResourceUsage(0); ShaderBuffers[Frequency].Formats[i] = PF_Unknown; } ShaderBuffers[Frequency].Bound = 0; for (uint32 i = 0; i < ML_MaxTextures; i++) { ShaderTextures[Frequency].Textures[i] = nullptr; ShaderTextures[Frequency].Usage[i] = MTL::ResourceUsage(0); } ShaderTextures[Frequency].Bound = 0; } VisibilityResults = nullptr; } void FMetalStateCache::Reset() { SampleCount = 0; FMemory::Memzero(Viewport); FMemory::Memzero(Scissor); ActiveViewports = 0; ActiveScissors = 0; FMemory::Memzero(RenderPassInfo); bIsRenderTargetActive = false; bHasValidRenderTarget = false; bHasValidColorTarget = false; FMemory::Memzero(DirtyUniformBuffers); FMemory::Memzero(BoundUniformBuffers); ActiveUniformBuffers.Empty(); for (uint32 i = 0; i < MaxVertexElementCount; i++) { VertexBuffers[i].Buffer = nullptr; VertexBuffers[i].Bytes = nullptr; #if METAL_RHI_RAYTRACING VertexBuffers[i].AccelerationStructure = nullptr; #endif VertexBuffers[i].Length = 0; VertexBuffers[i].Offset = 0; } for (uint32 Frequency = 0; Frequency < EMetalShaderStages::Num; Frequency++) { ShaderSamplers[Frequency].Bound = 0; for (uint32 i = 0; i < ML_MaxSamplers; i++) { ShaderSamplers[Frequency].Samplers[i] = nullptr; } for (uint32 i = 0; i < ML_MaxBuffers; i++) { ShaderBuffers[Frequency].Buffers[i].Buffer = nullptr; #if METAL_RHI_RAYTRACING ShaderBuffers[Frequency].Buffers[i].AccelerationStructure = nullptr; #endif ShaderBuffers[Frequency].Buffers[i].Bytes = nullptr; ShaderBuffers[Frequency].Buffers[i].Length = 0; ShaderBuffers[Frequency].Buffers[i].ElementRowPitch = 0; ShaderBuffers[Frequency].Buffers[i].Offset = 0; ShaderBuffers[Frequency].Formats[i] = PF_Unknown; } ShaderBuffers[Frequency].Bound = 0; for (uint32 i = 0; i < ML_MaxTextures; i++) { ShaderTextures[Frequency].Textures[i] = nullptr; ShaderTextures[Frequency].Usage[i] = MTL::ResourceUsage(0); } ShaderTextures[Frequency].Bound = 0; } VisibilityResults = nullptr; VisibilityMode = MTL::VisibilityResultModeDisabled; VisibilityOffset = 0; VisibilityWritten = 0; DepthStencilState.SafeRelease(); RasterizerState.SafeRelease(); GraphicsPSO.SafeRelease(); ComputeShader.SafeRelease(); PreviousComputeShader.SafeRelease(); DepthStencilSurface.SafeRelease(); StencilRef = 0; if(RenderPassDesc) { FMetalRenderPassDescriptorPool::Get().ReleaseDescriptor(RenderPassDesc); } RenderPassDesc = nullptr; for (uint32 i = 0; i < MaxSimultaneousRenderTargets; i++) { ColorStore[i] = MTL::StoreActionUnknown; } DepthStore = MTL::StoreActionUnknown; StencilStore = MTL::StoreActionUnknown; BlendFactor = FLinearColor::Transparent; FrameBufferSize = CGSizeMake(0.0, 0.0); RenderTargetArraySize = 0; RasterBits = EMetalRenderFlagMask; PipelineBits = EMetalPipelineFlagMask; #if PLATFORM_SUPPORTS_BINDLESS_RENDERING if(GIsMetalInitialized) { FMetalBindlessDescriptorManager* BindlessDescriptorManager = Device.GetBindlessDescriptorManager(); if(BindlessDescriptorManager->IsSupported()) { for (uint32 i = 0; i < UE_ARRAY_COUNT(VertexBufferVAs); i++) { VertexBufferVAs[i].addr = 0; VertexBufferVAs[i].stride = 0; VertexBufferVAs[i].length = 0; } // Clear CBV table for (uint32 Frequency = 0; Frequency < EMetalShaderStages::Num; Frequency++) { for (uint32 i = 0; i < TopLevelABNumEntry; i++) { CBVTable[Frequency][i] = 0ull; } } } } #endif } static bool MTLScissorRectEqual(MTL::ScissorRect const& Left, MTL::ScissorRect const& Right) { return Left.x == Right.x && Left.y == Right.y && Left.width == Right.width && Left.height == Right.height; } void FMetalStateCache::SetScissorRect(bool const bEnable, MTL::ScissorRect const& Rect) { if (bEnable) { Scissor[0] = Rect; } else { Scissor[0].x = Viewport[0].originX; Scissor[0].y = Viewport[0].originY; Scissor[0].width = Viewport[0].width; Scissor[0].height = Viewport[0].height; } // Clamp to framebuffer size - Metal doesn't allow scissor to be larger. Scissor[0].x = Scissor[0].x; Scissor[0].y = Scissor[0].y; // FrameBufferSize will be 0 if using RT-less rasterization, so ignore if(FrameBufferSize.width != 0 && FrameBufferSize.height != 0) { Scissor[0].width = FMath::Max((Scissor[0].x + Scissor[0].width <= FMath::RoundToInt32(FrameBufferSize.width)) ? Scissor[0].width : FMath::RoundToInt32(FrameBufferSize.width) - Scissor[0].x, (NS::UInteger)1u); Scissor[0].height = FMath::Max((Scissor[0].y + Scissor[0].height <= FMath::RoundToInt32(FrameBufferSize.height)) ? Scissor[0].height : FMath::RoundToInt32(FrameBufferSize.height) - Scissor[0].y, (NS::UInteger)1u); } RasterBits |= EMetalRenderFlagScissorRect; ActiveScissors = 1; } void FMetalStateCache::SetBlendFactor(FLinearColor const& InBlendFactor) { if(BlendFactor != InBlendFactor) { BlendFactor = InBlendFactor; RasterBits |= EMetalRenderFlagBlendColor; } } void FMetalStateCache::SetStencilRef(uint32 const InStencilRef) { if(StencilRef != InStencilRef) { StencilRef = InStencilRef; RasterBits |= EMetalRenderFlagStencilReferenceValue; } } void FMetalStateCache::SetDepthStencilState(FMetalDepthStencilState* InDepthStencilState) { if(DepthStencilState != InDepthStencilState) { DepthStencilState = InDepthStencilState; RasterBits |= EMetalRenderFlagDepthStencilState; } } void FMetalStateCache::SetRasterizerState(FMetalRasterizerState* InRasterizerState) { if(RasterizerState != InRasterizerState) { RasterizerState = InRasterizerState; #if PLATFORM_VISIONOS RasterBits |= EMetalRenderFlagFrontFacingWinding|EMetalRenderFlagCullMode|EMetalRenderFlagDepthBias|EMetalRenderFlagTriangleFillMode; #else RasterBits |= EMetalRenderFlagFrontFacingWinding|EMetalRenderFlagCullMode|EMetalRenderFlagDepthBias|EMetalRenderFlagTriangleFillMode|EMetalRenderFlagDepthClipMode; #endif } } void FMetalStateCache::SetComputeShader(FMetalComputeShader* InComputeShader) { if(ComputeShader != InComputeShader) { ComputeShader = InComputeShader; PipelineBits |= EMetalPipelineFlagComputeShader; DirtyUniformBuffers[EMetalShaderStages::Compute] = 0xffffffff; for (uint32 Index = 0; Index < ML_MaxTextures; ++Index) { ShaderTextures[EMetalShaderStages::Compute].Textures[Index] = nullptr; ShaderTextures[EMetalShaderStages::Compute].Usage[Index] = MTL::ResourceUsage(0); } ShaderTextures[EMetalShaderStages::Compute].Bound = 0; for (const auto& PackedGlobalArray : InComputeShader->Bindings.PackedGlobalArrays) { ShaderParameters[EMetalShaderStages::Compute].PrepareGlobalUniforms(CrossCompiler::PackedTypeNameToTypeIndex(PackedGlobalArray.TypeName), PackedGlobalArray.Size); } } } bool FMetalStateCache::SetRenderPassInfo(FRHIRenderPassInfo const& InRenderTargets, FMetalQueryBuffer* QueryBuffer) { bool bNeedsSet = false; // see if our new Info matches our previous Info if (NeedsToSetRenderTarget(InRenderTargets)) { bool bNeedsClear = false; //Create local store action states if we support deferred store MTL::StoreAction NewColorStore[MaxSimultaneousRenderTargets]; for (uint32 i = 0; i < MaxSimultaneousRenderTargets; ++i) { NewColorStore[i] = MTL::StoreActionUnknown; } MTL::StoreAction NewDepthStore = MTL::StoreActionUnknown; MTL::StoreAction NewStencilStore = MTL::StoreActionUnknown; // back this up for next frame RenderPassInfo = InRenderTargets; if(RenderPassDesc) { FMetalRenderPassDescriptorPool::Get().ReleaseDescriptor(RenderPassDesc); } // at this point, we need to fully set up an encoder/command buffer, so make a new one (autoreleased) MTL::RenderPassDescriptor* RenderPass = FMetalRenderPassDescriptorPool::Get().CreateDescriptor(); // if we need to do queries, write to the supplied query buffer { VisibilityResults = QueryBuffer; RenderPass->setVisibilityResultBuffer(QueryBuffer ? QueryBuffer->Buffer->GetMTLBuffer() : nullptr); } if (QueryBuffer != VisibilityResults) { VisibilityOffset = 0; VisibilityWritten = 0; } // default to non-msaa int32 OldCount = SampleCount; SampleCount = 0; bIsRenderTargetActive = false; bHasValidRenderTarget = false; bHasValidColorTarget = false; uint8 ArrayTargets = 0; uint8 BoundTargets = 0; uint32 ArrayRenderLayers = UINT_MAX; bool bFramebufferSizeSet = false; FrameBufferSize = CGSizeMake(0.f, 0.f); MTL::RenderPassColorAttachmentDescriptorArray* Attachments = RenderPass->colorAttachments(); uint32 NumColorRenderTargets = RenderPassInfo.GetNumColorRenderTargets(); // If we don't have RT or Depth we need to set default raster sample count if(!NumColorRenderTargets && RenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget == nullptr) { uint32_t RTWidth = FMath::Max(RenderPassInfo.ResolveRect.X2, GetViewport(0).width); uint32_t RTHeight = FMath::Max(RenderPassInfo.ResolveRect.Y2, GetViewport(0).height); RenderPass->setRenderTargetWidth(RTWidth); RenderPass->setRenderTargetHeight(RTHeight); RenderPass->setDefaultRasterSampleCount(1); } else { RenderPass->setRenderTargetWidth(0); RenderPass->setRenderTargetHeight(0); } for (uint32 RenderTargetIndex = 0; RenderTargetIndex < MaxSimultaneousRenderTargets; RenderTargetIndex++) { // default to invalid uint8 FormatKey = 0; // only try to set it if it was one that was set (ie less than RenderPassInfo.NumColorRenderTargets) if (RenderTargetIndex < NumColorRenderTargets && RenderPassInfo.ColorRenderTargets[RenderTargetIndex].RenderTarget != nullptr) { const FRHIRenderPassInfo::FColorEntry& RenderTargetView = RenderPassInfo.ColorRenderTargets[RenderTargetIndex]; ColorTargets[RenderTargetIndex] = RenderTargetView.RenderTarget; ResolveTargets[RenderTargetIndex] = RenderTargetView.ResolveTarget; FMetalSurface& Surface = *GetMetalSurfaceFromRHITexture(RenderTargetView.RenderTarget); FormatKey = Surface.FormatKey; uint32 Width = FMath::Max((uint32)(Surface.GetDesc().Extent.X >> RenderTargetView.MipIndex), (uint32)1); uint32 Height = FMath::Max((uint32)(Surface.GetDesc().Extent.Y >> RenderTargetView.MipIndex), (uint32)1); if(!bFramebufferSizeSet) { bFramebufferSizeSet = true; FrameBufferSize.width = Width; FrameBufferSize.height = Height; } else { FrameBufferSize.width = FMath::Min(FrameBufferSize.width, (CGFloat)Width); FrameBufferSize.height = FMath::Min(FrameBufferSize.height, (CGFloat)Height); } // if this is the back buffer, make sure we have a usable drawable ConditionalUpdateBackBuffer(Surface); FMetalSurface* ResolveSurface = GetMetalSurfaceFromRHITexture(RenderTargetView.ResolveTarget); if (ResolveSurface) { ConditionalUpdateBackBuffer(*ResolveSurface); } BoundTargets |= 1 << RenderTargetIndex; #if !PLATFORM_MAC if (Surface.Texture.get() == nullptr) { SampleCount = OldCount; return true; } #endif // The surface cannot be nullptr - we have to have a valid render-target array after this call. check (Surface.Texture); // user code generally passes -1 as a default, but we need 0 uint32 ArraySliceIndex = RenderTargetView.ArraySlice == 0xFFFFFFFF ? 0 : RenderTargetView.ArraySlice; switch(Surface.GetDesc().Dimension) { case ETextureDimension::Texture2DArray: case ETextureDimension::Texture3D: case ETextureDimension::TextureCube: case ETextureDimension::TextureCubeArray: if(RenderTargetView.ArraySlice == 0xFFFFFFFF) { ArrayTargets |= (1 << RenderTargetIndex); ArrayRenderLayers = FMath::Min(ArrayRenderLayers, Surface.GetNumFaces()); } else { ArrayRenderLayers = 1; } break; default: ArrayRenderLayers = 1; break; } MTL::RenderPassColorAttachmentDescriptor* ColorAttachment = Attachments->object(RenderTargetIndex); ERenderTargetStoreAction HighLevelStoreAction = GetStoreAction(RenderTargetView.Action); ERenderTargetLoadAction HighLevelLoadAction = GetLoadAction(RenderTargetView.Action); // on iOS with memory-less MSAA textures we can't load them // in case high level code wants to load and render to MSAA target, set attachment to a resolved texture bool bUseResolvedTexture = false; #if PLATFORM_IOS bUseResolvedTexture = ( Surface.MSAATexture && Surface.MSAATexture->storageMode() == MTL::StorageModeMemoryless && HighLevelLoadAction == ERenderTargetLoadAction::ELoad); #endif bool bMemoryless = false; if (Surface.MSAATexture && !bUseResolvedTexture) { #if PLATFORM_IOS if (Surface.MSAATexture->storageMode() == MTL::StorageModeMemoryless) { bMemoryless = true; HighLevelLoadAction = ERenderTargetLoadAction::EClear; } #endif // set up an MSAA attachment ColorAttachment->setTexture(Surface.MSAATexture.get()); NewColorStore[RenderTargetIndex] = GetMetalRTStoreAction(Device, ERenderTargetStoreAction::EMultisampleResolve); MTL::StoreAction FinalStoreAction = !bMemoryless && GRHIDeviceId > 2 ? MTL::StoreActionUnknown : NewColorStore[RenderTargetIndex]; FinalStoreAction = ConditionalOverrideStoreAction(FinalStoreAction, false, !!Surface.MSAATexture); ColorAttachment->setStoreAction(FinalStoreAction); ColorAttachment->setResolveTexture(Surface.MSAAResolveTexture ? Surface.MSAAResolveTexture.get() : Surface.Texture.get()); SampleCount = Surface.MSAATexture->sampleCount(); } else { #if PLATFORM_IOS if (Surface.Texture->storageMode() == MTL::StorageModeMemoryless) { bMemoryless = true; HighLevelStoreAction = ERenderTargetStoreAction::ENoAction; HighLevelLoadAction = ERenderTargetLoadAction::EClear; } #endif // set up non-MSAA attachment ColorAttachment->setTexture(Surface.Texture.get()); NewColorStore[RenderTargetIndex] = GetMetalRTStoreAction(Device, HighLevelStoreAction); ColorAttachment->setStoreAction(!bMemoryless ? MTL::StoreActionUnknown : NewColorStore[RenderTargetIndex]); SampleCount = 1; } ColorAttachment->setLevel(RenderTargetView.MipIndex); if(Surface.GetDesc().IsTexture3D()) { ColorAttachment->setSlice(0); ColorAttachment->setDepthPlane(ArraySliceIndex); } else { ColorAttachment->setSlice(ArraySliceIndex); } ColorAttachment->setLoadAction((Surface.Written || !bImmediate) ? GetMetalRTLoadAction(HighLevelLoadAction) : MTL::LoadActionClear); FPlatformAtomics::InterlockedExchange(&Surface.Written, 1); bNeedsClear |= (ColorAttachment->loadAction() == MTL::LoadActionClear); const FClearValueBinding& ClearValue = RenderPassInfo.ColorRenderTargets[RenderTargetIndex].RenderTarget->GetClearBinding(); if (ClearValue.ColorBinding == EClearBinding::EColorBound) { const FLinearColor& ClearColor = ClearValue.GetClearColor(); ColorAttachment->setClearColor(MTL::ClearColor(ClearColor.R, ClearColor.G, ClearColor.B, ClearColor.A)); } bHasValidRenderTarget = true; bHasValidColorTarget = true; } else { ColorTargets[RenderTargetIndex].SafeRelease(); ResolveTargets[RenderTargetIndex].SafeRelease(); } } RenderTargetArraySize = 1; if(ArrayTargets) { if (!Device.SupportsFeature(EMetalFeaturesLayeredRendering)) { METAL_FATAL_ASSERT(ArrayRenderLayers != 1, TEXT("Layered rendering is unsupported on this device (%d)."), ArrayRenderLayers); } #if PLATFORM_MAC else { METAL_FATAL_ASSERT(ArrayTargets == BoundTargets, TEXT("All color render targets must be layered when performing multi-layered rendering under Metal (%d != %d)."), ArrayTargets, BoundTargets); RenderTargetArraySize = ArrayRenderLayers; RenderPass->setRenderTargetArrayLength(ArrayRenderLayers); } #endif } // default to invalid uint8 DepthFormatKey = 0; uint8 StencilFormatKey = 0; // setup depth and/or stencil if (RenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget != nullptr) { FMetalSurface& Surface = *GetMetalSurfaceFromRHITexture(RenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget); switch(Surface.GetDesc().Dimension) { case ETextureDimension::Texture2DArray: case ETextureDimension::Texture3D: case ETextureDimension::TextureCube: case ETextureDimension::TextureCubeArray: ArrayRenderLayers = Surface.GetNumFaces(); break; default: ArrayRenderLayers = 1; break; } if(!ArrayTargets && ArrayRenderLayers > 1) { METAL_FATAL_ASSERT(Device.SupportsFeature(EMetalFeaturesLayeredRendering), TEXT("Layered rendering is unsupported on this device (%d)."), ArrayRenderLayers); #if PLATFORM_MAC RenderTargetArraySize = ArrayRenderLayers; RenderPass->setRenderTargetArrayLength(ArrayRenderLayers); #endif } if(!bFramebufferSizeSet) { bFramebufferSizeSet = true; FrameBufferSize.width = Surface.GetDesc().Extent.X; FrameBufferSize.height = Surface.GetDesc().Extent.Y; } else { FrameBufferSize.width = FMath::Min(FrameBufferSize.width, (CGFloat)Surface.GetDesc().Extent.X); FrameBufferSize.height = FMath::Min(FrameBufferSize.height, (CGFloat)Surface.GetDesc().Extent.Y); } EPixelFormat DepthStencilPixelFormat = RenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget->GetFormat(); MTL::Texture* DepthTexture = nullptr; MTL::Texture* StencilTexture = nullptr; const bool bSupportSeparateMSAAResolve = FMetalCommandQueue::SupportsSeparateMSAAAndResolveTarget(); uint32 DepthSampleCount = (Surface.MSAATexture ? Surface.MSAATexture->sampleCount() : Surface.Texture->sampleCount()); bool bDepthStencilSampleCountMismatchFixup = false; DepthTexture = Surface.MSAATexture ? Surface.MSAATexture.get() : Surface.Texture.get(); if (SampleCount == 0) { SampleCount = DepthSampleCount; } else if (SampleCount != DepthSampleCount) { static bool bLogged = false; if (!bSupportSeparateMSAAResolve) { //in the case of NOT support separate MSAA resolve the high level may legitimately cause a mismatch which we need to handle by binding the resolved target which we normally wouldn't do. DepthTexture = Surface.Texture.get(); bDepthStencilSampleCountMismatchFixup = true; DepthSampleCount = 1; } else if (!bLogged) { UE_LOG(LogMetal, Error, TEXT("If we support separate targets the high level should always give us matching counts")); bLogged = true; } } switch (DepthStencilPixelFormat) { case PF_X24_G8: case PF_DepthStencil: case PF_D24: { MTL::PixelFormat DepthStencilFormat = Surface.Texture ? (MTL::PixelFormat)Surface.Texture->pixelFormat() : MTL::PixelFormatInvalid; switch(DepthStencilFormat) { case MTL::PixelFormatDepth32Float: StencilTexture = nullptr; break; case MTL::PixelFormatStencil8: StencilTexture = DepthTexture; break; case MTL::PixelFormatDepth32Float_Stencil8: StencilTexture = DepthTexture; break; #if PLATFORM_MAC case MTL::PixelFormatDepth24Unorm_Stencil8: StencilTexture = DepthTexture; break; #endif default: break; } break; } case PF_ShadowDepth: { break; } default: break; } float DepthClearValue = 0.0f; uint32 StencilClearValue = 0; const FClearValueBinding& ClearValue = RenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget->GetClearBinding(); if (ClearValue.ColorBinding == EClearBinding::EDepthStencilBound) { ClearValue.GetDepthStencil(DepthClearValue, StencilClearValue); } else if(!ArrayTargets && ArrayRenderLayers > 1) { DepthClearValue = 1.0f; } bool const bCombinedDepthStencilUsingStencil = (DepthTexture && (MTL::PixelFormat)DepthTexture->pixelFormat() != MTL::PixelFormatDepth32Float && RenderPassInfo.DepthStencilRenderTarget.ExclusiveDepthStencil.IsUsingStencil()); bool const bUsingDepth = (RenderPassInfo.DepthStencilRenderTarget.ExclusiveDepthStencil.IsUsingDepth() || (bCombinedDepthStencilUsingStencil)); if (DepthTexture && bUsingDepth) { MTL::RenderPassDepthAttachmentDescriptor* DepthAttachment = MTL::RenderPassDepthAttachmentDescriptor::alloc()->init(); check(DepthAttachment); DepthFormatKey = Surface.FormatKey; ERenderTargetActions DepthActions = GetDepthActions(RenderPassInfo.DepthStencilRenderTarget.Action); ERenderTargetLoadAction DepthLoadAction = GetLoadAction(DepthActions); ERenderTargetStoreAction DepthStoreAction = GetStoreAction(DepthActions); // set up the depth attachment DepthAttachment->setTexture(DepthTexture); DepthAttachment->setLoadAction(GetMetalRTLoadAction(DepthLoadAction)); bNeedsClear |= (DepthAttachment->loadAction() == MTL::LoadActionClear); ERenderTargetStoreAction HighLevelStoreAction = (Surface.MSAATexture && !bDepthStencilSampleCountMismatchFixup) ? ERenderTargetStoreAction::EMultisampleResolve : DepthStoreAction; if (bUsingDepth && (HighLevelStoreAction == ERenderTargetStoreAction::ENoAction || bDepthStencilSampleCountMismatchFixup)) { if (DepthSampleCount > 1) { HighLevelStoreAction = ERenderTargetStoreAction::EMultisampleResolve; } else { HighLevelStoreAction = ERenderTargetStoreAction::EStore; } } const bool bSupportsMSAADepthResolve = Device.SupportsFeature(EMetalFeaturesMSAADepthResolve); bool bDepthTextureMemoryless = false; #if PLATFORM_IOS bDepthTextureMemoryless = DepthTexture->storageMode() == MTL::StorageModeMemoryless; if (bDepthTextureMemoryless) { DepthAttachment->setLoadAction(MTL::LoadActionClear); if (bSupportsMSAADepthResolve && Surface.MSAATexture && DepthStoreAction == ERenderTargetStoreAction::EMultisampleResolve) { HighLevelStoreAction = ERenderTargetStoreAction::EMultisampleResolve; } else { HighLevelStoreAction = ERenderTargetStoreAction::ENoAction; } } else { HighLevelStoreAction = DepthStoreAction; } #endif //needed to quiet the metal validation that runs when you end renderpass. (it requires some kind of 'resolve' for an msaa target) //But with deferredstore we don't set the real one until submit time. NewDepthStore = !Surface.MSAATexture || bSupportsMSAADepthResolve ? GetMetalRTStoreAction(Device, HighLevelStoreAction) : MTL::StoreActionDontCare; MTL::StoreAction FinalStoreAction = !bDepthTextureMemoryless && Surface.MSAATexture && GRHIDeviceId > 2 ? MTL::StoreActionUnknown : NewDepthStore; FinalStoreAction = ConditionalOverrideStoreAction(FinalStoreAction, true, !!Surface.MSAATexture); DepthAttachment->setStoreAction(FinalStoreAction); DepthAttachment->setClearDepth(DepthClearValue); check(SampleCount > 0); if (Surface.MSAATexture && bSupportsMSAADepthResolve && DepthAttachment->storeAction() != MTL::StoreActionDontCare) { if (!bDepthStencilSampleCountMismatchFixup) { DepthAttachment->setResolveTexture(Surface.MSAAResolveTexture ? Surface.MSAAResolveTexture.get() : Surface.Texture.get()); } #if PLATFORM_MAC //would like to assert and do manual custom resolve, but that is causing some kind of weird corruption. //checkf(false, TEXT("Depth resolves need to do 'max' for correctness. MacOS does not expose this yet unless the spec changed.")); #else DepthAttachment->setDepthResolveFilter(MTL::MultisampleDepthResolveFilterMax); #endif } bHasValidRenderTarget = true; // and assign it RenderPass->setDepthAttachment(DepthAttachment); DepthAttachment->release(); } //if we're dealing with a samplecount mismatch we just bail on stencil entirely as stencil //doesn't have an autoresolve target to use. bool const bCombinedDepthStencilUsingDepth = (StencilTexture && StencilTexture->pixelFormat() != MTL::PixelFormatStencil8 && RenderPassInfo.DepthStencilRenderTarget.ExclusiveDepthStencil.IsUsingDepth()); bool const bUsingStencil = RenderPassInfo.DepthStencilRenderTarget.ExclusiveDepthStencil.IsUsingStencil() || (bCombinedDepthStencilUsingDepth); if (StencilTexture && bUsingStencil) { MTL::RenderPassStencilAttachmentDescriptor* StencilAttachment = MTL::RenderPassStencilAttachmentDescriptor::alloc()->init(); StencilFormatKey = Surface.FormatKey; ERenderTargetActions StencilActions = GetStencilActions(RenderPassInfo.DepthStencilRenderTarget.Action); ERenderTargetLoadAction StencilLoadAction = GetLoadAction(StencilActions); ERenderTargetStoreAction StencilStoreAction = GetStoreAction(StencilActions); // set up the stencil attachment StencilAttachment->setTexture(StencilTexture); StencilAttachment->setLoadAction(GetMetalRTLoadAction(StencilLoadAction)); bNeedsClear |= (StencilAttachment->loadAction() == MTL::LoadActionClear); ERenderTargetStoreAction HighLevelStoreAction = StencilStoreAction; if (bUsingStencil && (HighLevelStoreAction == ERenderTargetStoreAction::ENoAction || bDepthStencilSampleCountMismatchFixup)) { HighLevelStoreAction = ERenderTargetStoreAction::EStore; } bool bStencilMemoryless = false; #if PLATFORM_IOS if (StencilTexture->storageMode() == MTL::StorageModeMemoryless) { bStencilMemoryless = true; HighLevelStoreAction = ERenderTargetStoreAction::ENoAction; StencilAttachment->setLoadAction(MTL::LoadActionClear); } else { HighLevelStoreAction = StencilStoreAction; } #endif // For the case where Depth+Stencil is MSAA we can't Resolve depth and Store stencil - we can only Resolve + DontCare or StoreResolve + Store (on newer H/W and iOS). // We only allow use of StoreResolve in the Desktop renderers as the mobile renderer does not and should not assume hardware support for it. NewStencilStore = (StencilTexture->sampleCount() == 1 || GetMetalRTStoreAction(Device, ERenderTargetStoreAction::EMultisampleResolve) == MTL::StoreActionStoreAndMultisampleResolve) ? GetMetalRTStoreAction(Device, HighLevelStoreAction) : MTL::StoreActionDontCare; bool bStoreAction = !bStencilMemoryless && StencilTexture->sampleCount() > 1 && GRHIDeviceId > 2; StencilAttachment->setStoreAction(bStoreAction ? MTL::StoreActionUnknown : NewStencilStore); StencilAttachment->setClearStencil(StencilClearValue); if (SampleCount == 0) { SampleCount = StencilAttachment->texture()->sampleCount(); } bHasValidRenderTarget = true; // and assign it RenderPass->setStencilAttachment(StencilAttachment); StencilAttachment->release(); } } //Update deferred store states if required otherwise they're already set directly on the Metal Attachement Descriptors { for (uint32 i = 0; i < MaxSimultaneousRenderTargets; ++i) { ColorStore[i] = NewColorStore[i]; } DepthStore = NewDepthStore; StencilStore = NewStencilStore; } if (SampleCount == 0) { SampleCount = 1; } bIsRenderTargetActive = bHasValidRenderTarget; // Only start encoding if the render target state is valid if (bHasValidRenderTarget) { // Retain and/or release the depth-stencil surface in case it is a temporary surface for a draw call that writes to depth without a depth/stencil buffer bound. DepthStencilSurface = RenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget; DepthStencilResolve = RenderPassInfo.DepthStencilRenderTarget.ResolveTarget; } else { DepthStencilSurface.SafeRelease(); DepthStencilResolve.SafeRelease(); } RenderPassDesc = RenderPass; bNeedsSet = true; } return bNeedsSet; } void FMetalStateCache::InvalidateRenderTargets(void) { bHasValidRenderTarget = false; bHasValidColorTarget = false; bIsRenderTargetActive = false; } void FMetalStateCache::SetRenderTargetsActive(bool const bActive) { bIsRenderTargetActive = bActive; } static bool MTLViewportEqual(MTL::Viewport const& Left, MTL::Viewport const& Right) { return FMath::IsNearlyEqual(Left.originX, Right.originX) && FMath::IsNearlyEqual(Left.originY, Right.originY) && FMath::IsNearlyEqual(Left.width, Right.width) && FMath::IsNearlyEqual(Left.height, Right.height) && FMath::IsNearlyEqual(Left.znear, Right.znear) && FMath::IsNearlyEqual(Left.zfar, Right.zfar); } void FMetalStateCache::SetViewport(const MTL::Viewport& InViewport) { if (!MTLViewportEqual(Viewport[0], InViewport)) { Viewport[0] = InViewport; RasterBits |= EMetalRenderFlagViewport; } ActiveViewports = 1; MTL::ScissorRect Rect; Rect.x = InViewport.originX; Rect.y = InViewport.originY; Rect.width = InViewport.width; Rect.height = InViewport.height; SetScissorRect(false, Rect); } void FMetalStateCache::SetViewport(uint32 Index, const MTL::Viewport& InViewport) { check(Index < ML_MaxViewports); if (!MTLViewportEqual(Viewport[Index], InViewport)) { Viewport[Index] = InViewport; RasterBits |= EMetalRenderFlagViewport; } // There may not be gaps in the viewport array. ActiveViewports = Index + 1; // This always sets the scissor rect because the RHI doesn't bother to expose proper scissor states for multiple viewports. // This will have to change if we want to guarantee correctness in the mid to long term. { MTL::ScissorRect Rect; Rect.x = InViewport.originX; Rect.y = InViewport.originY; Rect.width = InViewport.width; Rect.height = InViewport.height; SetScissorRect(Index, false, Rect); } } void FMetalStateCache::SetScissorRect(uint32 Index, bool const bEnable, MTL::ScissorRect const& Rect) { check(Index < ML_MaxViewports); if (!MTLScissorRectEqual(Scissor[Index], Rect)) { // There's no way we can setup the bounds correctly - that must be done by the caller or incorrect rendering & crashes will ensue. Scissor[Index] = Rect; RasterBits |= EMetalRenderFlagScissorRect; } ActiveScissors = Index + 1; } void FMetalStateCache::SetViewports(const MTL::Viewport InViewport[], uint32 Count) { check(Count >= 1 && Count < ML_MaxViewports); // Check if the count has changed first & if so mark for a rebind if (ActiveViewports != Count) { RasterBits |= EMetalRenderFlagViewport; RasterBits |= EMetalRenderFlagScissorRect; } for (uint32 i = 0; i < Count; i++) { SetViewport(i, InViewport[i]); } ActiveViewports = Count; } void FMetalStateCache::SetVertexStream(uint32 const Index, FMetalBufferPtr Buffer, FMetalBufferData* Bytes, uint32 const Offset, uint32 const Length) { check(Index < MaxVertexElementCount); check(UNREAL_TO_METAL_BUFFER_INDEX(Index) < MaxMetalStreams); if (Buffer) { VertexBuffers[Index].Buffer = Buffer; } else { VertexBuffers[Index].Buffer = nullptr; } VertexBuffers[Index].Offset = 0; VertexBuffers[Index].Bytes = Bytes; VertexBuffers[Index].Length = Length; #if METAL_RHI_RAYTRACING VertexBuffers[Index].AccelerationStructure = nullptr; #endif #if METAL_USE_METAL_SHADER_CONVERTER FMetalBindlessDescriptorManager* BindlessDescriptorManager = Device.GetBindlessDescriptorManager(); if(IsMetalBindlessEnabled()) { // Update GPU VA (assuming the offset has changed since last time). if (Buffer || Bytes) { if (Bytes != nil) { uint8 const* BytesWithOffset = (((uint8 const*)VertexBuffers[Index].Bytes->Data) + Offset); uint32 Len = VertexBuffers[Index].Bytes->Len - Offset; FMetalBufferPtr SideBuffer = IRSideUploadToBuffer(BytesWithOffset, Len); CacheOrSkipResourceResidencyUpdate(SideBuffer->GetMTLBuffer(), EMetalShaderStages::Vertex, true); VertexBufferVAs[Index].addr = SideBuffer->GetGPUAddress(); VertexBufferVAs[Index].length = Length; } else { VertexBufferVAs[Index].addr = VertexBuffers[Index].Buffer->GetGPUAddress() + Offset; VertexBufferVAs[Index].length = Length; CacheOrSkipResourceResidencyUpdate(VertexBuffers[Index].Buffer->GetMTLBuffer(), EMetalShaderStages::Vertex, true); } } else { VertexBufferVAs[Index].addr = 0; VertexBufferVAs[Index].length = 0; } } else #endif { SetShaderBuffer(EMetalShaderStages::Vertex, VertexBuffers[Index].Buffer, Bytes, Offset, Length, UNREAL_TO_METAL_BUFFER_INDEX(Index), MTL::ResourceUsageRead); } } uint32 FMetalStateCache::GetVertexBufferSize(uint32 const Index) { check(Index < MaxVertexElementCount); check(UNREAL_TO_METAL_BUFFER_INDEX(Index) < MaxMetalStreams); return VertexBuffers[Index].Length; } void FMetalStateCache::SetGraphicsPipelineState(FMetalGraphicsPipelineState* State) { if (GraphicsPSO != State) { GraphicsPSO = State; DirtyUniformBuffers[EMetalShaderStages::Vertex] = 0xffffffff; DirtyUniformBuffers[EMetalShaderStages::Pixel] = 0xffffffff; #if PLATFORM_SUPPORTS_GEOMETRY_SHADERS DirtyUniformBuffers[EMetalShaderStages::Geometry] = 0xffffffff; #endif #if PLATFORM_SUPPORTS_MESH_SHADERS DirtyUniformBuffers[EMetalShaderStages::Mesh] = 0xffffffff; DirtyUniformBuffers[EMetalShaderStages::Amplification] = 0xffffffff; #endif PipelineBits |= EMetalPipelineFlagPipelineState; if (Device.GetRuntimeDebuggingLevel() >= EMetalDebugLevelResetOnBind) { for (uint32 i = 0; i < EMetalShaderStages::Num; i++) { ShaderBuffers[i].Bound = UINT32_MAX; ShaderTextures[i].Bound = FMETALTEXTUREMASK_MAX; ShaderSamplers[i].Bound = UINT16_MAX; } } SetDepthStencilState(State->DepthStencilState); SetRasterizerState(State->RasterizerState); #if PLATFORM_SUPPORTS_MESH_SHADERS if (State->MeshShader) { for (const auto& PackedGlobalArray : State->MeshShader->Bindings.PackedGlobalArrays) { ShaderParameters[EMetalShaderStages::Mesh].PrepareGlobalUniforms(CrossCompiler::PackedTypeNameToTypeIndex(PackedGlobalArray.TypeName), PackedGlobalArray.Size); } if (State->AmplificationShader) { for (const auto& PackedGlobalArray : State->AmplificationShader->Bindings.PackedGlobalArrays) { ShaderParameters[EMetalShaderStages::Amplification].PrepareGlobalUniforms(CrossCompiler::PackedTypeNameToTypeIndex(PackedGlobalArray.TypeName), PackedGlobalArray.Size); } } } else if (State->VertexShader) #endif { for (const auto& PackedGlobalArray : State->VertexShader->Bindings.PackedGlobalArrays) { ShaderParameters[EMetalShaderStages::Vertex].PrepareGlobalUniforms(CrossCompiler::PackedTypeNameToTypeIndex(PackedGlobalArray.TypeName), PackedGlobalArray.Size); } } if (State->PixelShader) { for (const auto& PackedGlobalArray : State->PixelShader->Bindings.PackedGlobalArrays) { ShaderParameters[EMetalShaderStages::Pixel].PrepareGlobalUniforms(CrossCompiler::PackedTypeNameToTypeIndex(PackedGlobalArray.TypeName), PackedGlobalArray.Size); } } } } FMetalShaderPipeline* FMetalStateCache::GetPipelineState() const { return GraphicsPSO->GetPipeline().Get(); } EPrimitiveType FMetalStateCache::GetPrimitiveType() { check(IsValidRef(GraphicsPSO)); return GraphicsPSO->GetPrimitiveType(); } void FMetalStateCache::BindUniformBuffer(EMetalShaderStages const Freq, uint32 const BufferIndex, FRHIUniformBuffer* BufferRHI) { check(BufferIndex < ML_MaxBuffers); if (BoundUniformBuffers[Freq][BufferIndex] != BufferRHI) { ActiveUniformBuffers.Add(BufferRHI); BoundUniformBuffers[Freq][BufferIndex] = BufferRHI; DirtyUniformBuffers[Freq] |= 1 << BufferIndex; } } void FMetalStateCache::SetVisibilityResultMode(MTL::VisibilityResultMode const Mode, NS::UInteger const Offset) { if (VisibilityMode != Mode || VisibilityOffset != Offset) { VisibilityMode = Mode; VisibilityOffset = Offset; RasterBits |= EMetalRenderFlagVisibilityResultMode; } } void FMetalStateCache::ConditionalUpdateBackBuffer(FMetalSurface& Surface) { // are we setting the back buffer? if so, make sure we have the drawable if (EnumHasAnyFlags(Surface.GetDesc().Flags, TexCreate_Presentable)) { // update the back buffer texture the first time used this frame if (Surface.Texture.get() == nullptr) { // set the texture into the backbuffer Surface.GetDrawableTexture(); } #if PLATFORM_MAC check (Surface.Texture); #endif } } bool FMetalStateCache::NeedsToSetRenderTarget(const FRHIRenderPassInfo& InRenderPassInfo) { // see if our new Info matches our previous Info uint32 CurrentNumColorRenderTargets = RenderPassInfo.GetNumColorRenderTargets(); uint32 NewNumColorRenderTargets = InRenderPassInfo.GetNumColorRenderTargets(); // basic checks bool bAllChecksPassed = GetHasValidRenderTarget() && bIsRenderTargetActive && CurrentNumColorRenderTargets == NewNumColorRenderTargets && (InRenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget == RenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget); // now check each color target if the basic tests passe if (bAllChecksPassed) { for (int32 RenderTargetIndex = 0; RenderTargetIndex < NewNumColorRenderTargets; RenderTargetIndex++) { const FRHIRenderPassInfo::FColorEntry& RenderTargetView = InRenderPassInfo.ColorRenderTargets[RenderTargetIndex]; const FRHIRenderPassInfo::FColorEntry& PreviousRenderTargetView = RenderPassInfo.ColorRenderTargets[RenderTargetIndex]; // handle simple case of switching textures or mip/slice if (RenderTargetView.RenderTarget != PreviousRenderTargetView.RenderTarget || RenderTargetView.ResolveTarget != PreviousRenderTargetView.ResolveTarget || RenderTargetView.MipIndex != PreviousRenderTargetView.MipIndex || RenderTargetView.ArraySlice != PreviousRenderTargetView.ArraySlice) { bAllChecksPassed = false; break; } // it's non-trivial when we need to switch based on load/store action: // LoadAction - it only matters what we are switching to in the new one // If we switch to Load, no need to switch as we can re-use what we already have // If we switch to Clear, we have to always switch to a new RT to force the clear // If we switch to DontCare, there's definitely no need to switch // If we switch *from* Clear then we must change target as we *don't* want to clear again. if (GetLoadAction(RenderTargetView.Action) == ERenderTargetLoadAction::EClear) { bAllChecksPassed = false; break; } // StoreAction - this matters what the previous one was **In Spirit** // If we come from Store, we need to switch to a new RT to force the store // If we come from DontCare, then there's no need to switch // @todo metal: However, we basically only use Store now, and don't // care about intermediate results, only final, so we don't currently check the value // if (PreviousRenderTargetView.StoreAction == ERenderTTargetStoreAction::EStore) // { // bAllChecksPassed = false; // break; // } } if (InRenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget && (GetLoadAction(GetDepthActions(InRenderPassInfo.DepthStencilRenderTarget.Action)) == ERenderTargetLoadAction::EClear || GetLoadAction(GetStencilActions(InRenderPassInfo.DepthStencilRenderTarget.Action)) == ERenderTargetLoadAction::EClear)) { bAllChecksPassed = false; } if (InRenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget && (GetStoreAction(GetDepthActions(InRenderPassInfo.DepthStencilRenderTarget.Action)) > GetStoreAction(GetDepthActions(RenderPassInfo.DepthStencilRenderTarget.Action)) || GetStoreAction(GetStencilActions(InRenderPassInfo.DepthStencilRenderTarget.Action)) > GetStoreAction(GetStencilActions(RenderPassInfo.DepthStencilRenderTarget.Action)))) { // Don't break the encoder if we can just change the store actions. MTL::StoreAction NewDepthStore = DepthStore; MTL::StoreAction NewStencilStore = StencilStore; if (GetStoreAction(GetDepthActions(InRenderPassInfo.DepthStencilRenderTarget.Action)) > GetStoreAction(GetDepthActions(RenderPassInfo.DepthStencilRenderTarget.Action))) { if (RenderPassDesc->depthAttachment()->texture()) { FMetalSurface& Surface = *GetMetalSurfaceFromRHITexture(RenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget); const uint32 DepthSampleCount = (Surface.MSAATexture ? Surface.MSAATexture->sampleCount() : Surface.Texture->sampleCount()); bool const bDepthStencilSampleCountMismatchFixup = (SampleCount != DepthSampleCount); ERenderTargetStoreAction HighLevelStoreAction = (Surface.MSAATexture && !bDepthStencilSampleCountMismatchFixup) ? ERenderTargetStoreAction::EMultisampleResolve : GetStoreAction(GetDepthActions(RenderPassInfo.DepthStencilRenderTarget.Action)); #if PLATFORM_IOS MTLTexturePtr Tex = Surface.MSAATexture ? Surface.MSAATexture : Surface.Texture; if (Tex->storageMode() == MTL::StorageModeMemoryless) { HighLevelStoreAction = ERenderTargetStoreAction::ENoAction; } #endif NewDepthStore = GetMetalRTStoreAction(Device, HighLevelStoreAction); } else { bAllChecksPassed = false; } } if (GetStoreAction(GetStencilActions(InRenderPassInfo.DepthStencilRenderTarget.Action)) > GetStoreAction(GetStencilActions(RenderPassInfo.DepthStencilRenderTarget.Action))) { if (RenderPassDesc->stencilAttachment()->texture()) { NewStencilStore = GetMetalRTStoreAction(Device, GetStoreAction(GetStencilActions(RenderPassInfo.DepthStencilRenderTarget.Action))); #if PLATFORM_IOS if (RenderPassDesc->stencilAttachment()->texture()->storageMode() == MTL::StorageModeMemoryless) { NewStencilStore = GetMetalRTStoreAction(Device, ERenderTargetStoreAction::ENoAction); } #endif } else { bAllChecksPassed = false; } } if (bAllChecksPassed) { DepthStore = NewDepthStore; StencilStore = NewStencilStore; } } } // if we are setting them to nothing, then this is probably end of frame, and we can't make a framebuffer // with nothng, so just abort this (only need to check on single MRT case) if (NewNumColorRenderTargets == 1 && InRenderPassInfo.ColorRenderTargets[0].RenderTarget == nullptr && InRenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget == nullptr) { bAllChecksPassed = true; } return bAllChecksPassed == false; } void FMetalStateCache::SetShaderBuffer( EMetalShaderStages const Frequency , FMetalBufferPtr Buffer , FMetalBufferData* const Bytes , NS::UInteger const Offset , NS::UInteger const Length , NS::UInteger const Index , MTL::ResourceUsage const Usage , EPixelFormat const Format , NS::UInteger const ElementRowPitch , TArray> ReferencedResources ) { check(Frequency < EMetalShaderStages::Num); check(Index < ML_MaxBuffers); if (ShaderBuffers[Frequency].Buffers[Index].Buffer != Buffer || ShaderBuffers[Frequency].Buffers[Index].Bytes != Bytes || ShaderBuffers[Frequency].Buffers[Index].Offset != Offset || ShaderBuffers[Frequency].Buffers[Index].Length != Length || ShaderBuffers[Frequency].Buffers[Index].ElementRowPitch != ElementRowPitch || !(ShaderBuffers[Frequency].Buffers[Index].Usage & Usage) || ShaderBuffers[Frequency].Formats[Index] != Format) { ShaderBuffers[Frequency].Buffers[Index].Buffer = Buffer; ShaderBuffers[Frequency].Buffers[Index].Bytes = Bytes; #if METAL_RHI_RAYTRACING ShaderBuffers[Frequency].Buffers[Index].AccelerationStructure = nullptr; #endif ShaderBuffers[Frequency].Buffers[Index].ReferencedResources = ReferencedResources; ShaderBuffers[Frequency].Buffers[Index].Offset = Offset; ShaderBuffers[Frequency].Buffers[Index].Length = Length; ShaderBuffers[Frequency].Buffers[Index].ElementRowPitch = ElementRowPitch; ShaderBuffers[Frequency].Buffers[Index].Usage = Usage; ShaderBuffers[Frequency].Formats[Index] = Format; if (Buffer || Bytes) { ShaderBuffers[Frequency].Bound |= (1 << Index); } else { ShaderBuffers[Frequency].Bound &= ~(1 << Index); } } } #if METAL_RHI_RAYTRACING void FMetalStateCache::SetShaderBuffer(EMetalShaderStages const Frequency, MTL::AccelerationStructure* AccelerationStructure, NS::UInteger const Index, TArray> ReferencedResources) { check(Frequency < EMetalShaderStages::Num); check(Index < ML_MaxBuffers); if (ShaderBuffers[Frequency].Buffers[Index].AccelerationStructure.GetPtr() != AccelerationStructure.GetPtr()) { ShaderBuffers[Frequency].Buffers[Index].AccelerationStructure = AccelerationStructure; ShaderBuffers[Frequency].Buffers[Index].Buffer = nullptr; ShaderBuffers[Frequency].Buffers[Index].Bytes = nullptr; ShaderBuffers[Frequency].Buffers[Index].ReferencedResources = ReferencedResources; ShaderBuffers[Frequency].Buffers[Index].Offset = 0; ShaderBuffers[Frequency].Buffers[Index].Length = 0; ShaderBuffers[Frequency].Buffers[Index].Usage = MTL::ResourceUsage(0); ShaderBuffers[Frequency].Formats[Index] = PF_Unknown; if (AccelerationStructure) { ShaderBuffers[Frequency].Bound |= (1 << Index); } else { ShaderBuffers[Frequency].Bound &= ~(1 << Index); } } } #endif // METAL_RHI_RAYTRACING static bool CanMakeTextureResidentViaHeaps(const MTL::Texture* Texture) { return !(Texture->usage() & MTL::TextureUsageRenderTarget) && !(Texture->usage() & MTL::TextureUsageShaderWrite); } #if METAL_USE_METAL_SHADER_CONVERTER void FMetalStateCache::CacheOrSkipResourceResidencyUpdate(MTL::Resource* InResource, EMetalShaderStages const Frequency, bool bReadOnly, bool bForceUseResource) { bool bAlreadyInSet = false; if (!bForceUseResource && bReadOnly && InResource->heap()) { HeapsUsedByStage[Frequency].Add(InResource->heap(), &bAlreadyInSet); } else { TSet& StageResources = bReadOnly ? ROResourcesByStage[Frequency] : RWResourcesByStage[Frequency]; if (!StageResources.Contains(InResource)) { StageResources.Add(InResource); } } } void FMetalStateCache::IRMakeSRVResident(EMetalShaderStages const Frequency, FMetalShaderResourceView* SRV) { FMetalBindlessDescriptorManager* BindlessDescriptorManager = Device.GetBindlessDescriptorManager(); if(!IsMetalBindlessEnabled()) { return; } switch (SRV->GetMetalType()) { case FMetalResourceViewBase::EMetalType::Null: checkf(false, TEXT("Attempt to bind a null SRV.")); break; case FMetalResourceViewBase::EMetalType::TextureView: { auto const& View = SRV->GetTextureView(); IRMakeTextureResident(Frequency, View.get()); break; } case FMetalResourceViewBase::EMetalType::BufferView: { auto const& View = SRV->GetBufferView(); CacheOrSkipResourceResidencyUpdate(View.Buffer->GetMTLBuffer(), Frequency, true); break; } #if 0 case FMetalResourceViewBase::EMetalType::AccelerationStructure: { MTL::AccelerationStructure* AccelerationStructure = SRV->GetAccelerationStructure(); AddUsedResource(AccelerationStructure, MTL::ResourceUsageRead, SRV->ReferencedResources); break; } #endif }; } void FMetalStateCache::IRMakeUAVResident(EMetalShaderStages const Frequency, FMetalUnorderedAccessView* UAV) { FMetalBindlessDescriptorManager* BindlessDescriptorManager = Device.GetBindlessDescriptorManager(); if(!IsMetalBindlessEnabled()) { return; } switch (UAV->GetMetalType()) { case FMetalResourceViewBase::EMetalType::Null: checkf(false, TEXT("Attempt to bind a null UAV. %s"), *UAV->GetOwnerName().ToString()); break; case FMetalResourceViewBase::EMetalType::TextureView: { auto const& View = UAV->GetTextureView(); if (View->buffer()) { CacheOrSkipResourceResidencyUpdate(View->buffer(), Frequency, false); } else if (View->parentTexture()) { CacheOrSkipResourceResidencyUpdate(View->parentTexture(), Frequency, false); } CacheOrSkipResourceResidencyUpdate(View.get(), Frequency, false); break; } case FMetalResourceViewBase::EMetalType::BufferView: { auto const& View = UAV->GetBufferView(); CacheOrSkipResourceResidencyUpdate(View.Buffer->GetMTLBuffer(), Frequency, false); break; } case FMetalResourceViewBase::EMetalType::TextureBufferBacked: { auto const& View = UAV->GetTextureBufferBacked(); CacheOrSkipResourceResidencyUpdate(View.Buffer->GetMTLBuffer(), Frequency, false); CacheOrSkipResourceResidencyUpdate(View.Texture.get(),Frequency, false); break; } #if METAL_RHI_RAYTRACING case FMetalResourceViewBase::EMetalType::AccelerationStructure: checkNoEntry(); // not implemented break; #endif default: checkNoEntry(); break; } if (UAV->IsTexture()) { // @TODO: this needs refactoring. FPlatformAtomics::InterlockedExchange(&ResourceCast(UAV->GetTexture())->Written, 1); } } void FMetalStateCache::IRMakeTextureResident(EMetalShaderStages const Frequency, MTL::Texture* Texture) { FMetalBindlessDescriptorManager* BindlessDescriptorManager = Device.GetBindlessDescriptorManager(); if (Texture->buffer()) { CacheOrSkipResourceResidencyUpdate(Texture->buffer(), Frequency, true); } else if (Texture->parentTexture()) { CacheOrSkipResourceResidencyUpdate(Texture->parentTexture(), Frequency, true, CanMakeTextureResidentViaHeaps(Texture->parentTexture())); } { CacheOrSkipResourceResidencyUpdate(Texture, Frequency, true, CanMakeTextureResidentViaHeaps(Texture)); } } void FMetalStateCache::IRForwardBindlessParameters(EMetalShaderStages const Frequency, TConstArrayView InBindlessParameters) { FMetalBindlessDescriptorManager* BindlessDescriptorManager = Device.GetBindlessDescriptorManager(); // Collect resources we need to map for this frame. for (FRHIShaderParameterResource const& Parameter : InBindlessParameters) { if(Parameter.Type == FRHIShaderParameterResource::EType::UniformBuffer) { continue; } const FRHIDescriptorHandle Handle = UE::RHICore::GetBindlessParameterHandle(Parameter); if (Handle.IsValid()) { if (Parameter.Type != FRHIShaderParameterResource::EType::Sampler) { switch (Parameter.Type) { case FRHIShaderParameterResource::EType::Texture: { FRHITexture* Texture = static_cast(Parameter.Resource); IRMakeTextureResident(Frequency, GetMetalSurfaceFromRHITexture(Texture)->Texture.get()); break; } case FRHIShaderParameterResource::EType::ResourceView: { FMetalShaderResourceView* SRV = static_cast(Parameter.Resource); IRMakeSRVResident(Frequency, SRV); break; } case FRHIShaderParameterResource::EType::UnorderedAccessView: { FMetalUnorderedAccessView* UAV = static_cast(Parameter.Resource); IRMakeUAVResident(Frequency, UAV); break; } default: checkNoEntry(); break; } } } } } void FMetalStateCache::IRBindPackedUniforms(EMetalShaderStages const Frequency, int32 Index, uint8 const* Bytes, const uint32 Size, FMetalBufferPtr& Buffer) { uint64 PackedUniformsVA; if(!Buffer) { Buffer = IRSideUploadToBuffer(Bytes, Size); } CacheOrSkipResourceResidencyUpdate(Buffer->GetMTLBuffer(), Frequency, true); CBVTable[Frequency][Index] = Buffer->GetGPUAddress(); } void FMetalStateCache::IRBindUniformBuffer(EMetalShaderStages const Frequency, int32 Index, FMetalUniformBuffer* UB) { FMetalBufferPtr Buffer = UB->BackingBuffer; CacheOrSkipResourceResidencyUpdate(Buffer->GetMTLBuffer(), Frequency, true); CBVTable[Frequency][Index] = Buffer->GetGPUAddress(); } #endif void FMetalStateCache::SetShaderTexture(EMetalShaderStages const Frequency, MTL::Texture* Texture, NS::UInteger const Index, MTL::ResourceUsage const Usage) { check(Frequency < EMetalShaderStages::Num); check(Index < ML_MaxTextures); #if (PLATFORM_IOS || PLATFORM_TVOS) UE_CLOG(Texture->storageMode() == MTL::StorageModeMemoryless, LogMetal, Fatal, TEXT("FATAL: Attempting to bind a memoryless texture. Stage %u Index %u Texture %s"), Frequency, Index, *NSStringToFString(Texture->description())); #endif if (ShaderTextures[Frequency].Textures[Index] != Texture || ShaderTextures[Frequency].Usage[Index] != Usage) { ShaderTextures[Frequency].Textures[Index] = Texture; ShaderTextures[Frequency].Usage[Index] = Usage; if (Texture) { ShaderTextures[Frequency].Bound |= (FMetalTextureMask(1) << FMetalTextureMask(Index)); } else { ShaderTextures[Frequency].Bound &= ~(FMetalTextureMask(1) << FMetalTextureMask(Index)); } } } void FMetalStateCache::SetShaderSamplerState(EMetalShaderStages const Frequency, FMetalSamplerState* const Sampler, NS::UInteger const Index) { check(Frequency < EMetalShaderStages::Num); check(Index < ML_MaxSamplers); if (ShaderSamplers[Frequency].Samplers[Index] != (Sampler ? Sampler->State : nullptr)) { if (Sampler) { #if !PLATFORM_MAC ShaderSamplers[Frequency].Samplers[Index] = ((Frequency == EMetalShaderStages::Vertex || Frequency == EMetalShaderStages::Compute) && Sampler->NoAnisoState) ? Sampler->NoAnisoState : Sampler->State; #else ShaderSamplers[Frequency].Samplers[Index] = Sampler->State; #endif ShaderSamplers[Frequency].Bound |= (1 << Index); } else { ShaderSamplers[Frequency].Samplers[Index] = nullptr; ShaderSamplers[Frequency].Bound &= ~(1 << Index); } } } static EMetalShaderStages TranslateShaderStage(CrossCompiler::EShaderStage ShaderStage) { switch (ShaderStage) { default: checkNoEntry(); [[fallthrough]]; case CrossCompiler::SHADER_STAGE_PIXEL : return EMetalShaderStages::Pixel; case CrossCompiler::SHADER_STAGE_VERTEX : return EMetalShaderStages::Vertex; case CrossCompiler::SHADER_STAGE_COMPUTE: return EMetalShaderStages::Compute; #if PLATFORM_SUPPORTS_GEOMETRY_SHADERS case CrossCompiler::SHADER_STAGE_GEOMETRY: return EMetalShaderStages::Geometry; #endif #if PLATFORM_SUPPORTS_MESH_SHADERS case CrossCompiler::SHADER_STAGE_MESH: return EMetalShaderStages::Mesh; case CrossCompiler::SHADER_STAGE_AMPLIFICATION: return EMetalShaderStages::Amplification; #endif // PLATFORM_SUPPORTS_MESH_SHADERS } } void FMetalStateCache::SetShaderResourceView(EMetalShaderStages ShaderStage, uint32 BindIndex, FMetalShaderResourceView* SRV) { if (SRV) { switch (SRV->GetMetalType()) { case FMetalResourceViewBase::EMetalType::Null: checkf(false, TEXT("Attempt to bind a null SRV.")); break; case FMetalResourceViewBase::EMetalType::TextureView: { SetShaderTexture(ShaderStage, SRV->GetTextureView().get(), BindIndex, MTL::ResourceUsage(MTL::ResourceUsageRead | MTL::ResourceUsageSample)); } break; case FMetalResourceViewBase::EMetalType::BufferView: { auto const& View = SRV->GetBufferView(); SetShaderBuffer(ShaderStage, View.Buffer, nullptr, View.Offset, View.Size, BindIndex, MTL::ResourceUsageRead); } break; #if METAL_RHI_RAYTRACING case FMetalResourceViewBase::EMetalType::AccelerationStructure: SetShaderBuffer(ShaderStage, Buffer->GetAccelerationStructure(), BindIndex, SRV->ReferencedResources); break; #endif } } } void FMetalStateCache::SetShaderUnorderedAccessView(EMetalShaderStages ShaderStage, uint32 BindIndex, FMetalUnorderedAccessView* UAV) { if (UAV) { MTL::ResourceUsage const Usage = MTL::ResourceUsage(MTL::ResourceUsageRead | MTL::ResourceUsageWrite); switch (UAV->GetMetalType()) { case FMetalResourceViewBase::EMetalType::Null: checkf(false, TEXT("Attempt to bind a null UAV.")); break; case FMetalResourceViewBase::EMetalType::TextureView: { SetShaderTexture(ShaderStage, UAV->GetTextureView().get(), BindIndex, Usage); } break; case FMetalResourceViewBase::EMetalType::BufferView: { auto const& View = UAV->GetBufferView(); SetShaderBuffer(ShaderStage, View.Buffer, nullptr, View.Offset, View.Size, BindIndex, Usage); } break; case FMetalResourceViewBase::EMetalType::TextureBufferBacked: { auto const& View = UAV->GetTextureBufferBacked(); uint32 BytesPerRow = View.Texture->bufferBytesPerRow(); uint32 ElementsPerRow = BytesPerRow / GPixelFormats[View.Format].BlockBytes; SetShaderBuffer(ShaderStage, View.Buffer, nullptr, View.Offset, View.Size, BindIndex, Usage, static_cast(View.Format), ElementsPerRow); SetShaderTexture(ShaderStage, View.Texture.get(), BindIndex, Usage); } break; #if METAL_RHI_RAYTRACING case FMetalResourceViewBase::EMetalType::AccelerationStructure: checkNoEntry(); // not implemented break; #endif } if (UAV->IsTexture()) { // @todo this needs refactoring. FPlatformAtomics::InterlockedExchange(&ResourceCast(UAV->GetTexture())->Written, 1); } } } template void FMetalStateCache::SetResourcesFromTables(ShaderType Shader, CrossCompiler::EShaderStage ShaderStage) { checkSlow(Shader); EMetalShaderStages Frequency = TranslateShaderStage(ShaderStage); if (!Device.SupportsFeature(EMetalFeaturesIABs)) { struct FUniformResourceBinder { FMetalStateCache& StateCache; EMetalShaderStages Frequency; void SetUAV(FRHIUnorderedAccessView* UAV, uint8 Index) { #if PLATFORM_SUPPORTS_BINDLESS_RENDERING if(IsMetalBindlessEnabled()) { StateCache.IRMakeUAVResident(Frequency, static_cast(UAV)); } else #endif { StateCache.SetShaderUnorderedAccessView(Frequency, Index, static_cast(UAV)); } } void SetSRV(FRHIShaderResourceView* SRV, uint8 Index) { #if PLATFORM_SUPPORTS_BINDLESS_RENDERING if(IsMetalBindlessEnabled()) { StateCache.IRMakeSRVResident(Frequency, static_cast(SRV)); } else #endif { StateCache.SetShaderResourceView(Frequency, Index, static_cast(SRV)); } } void SetTexture(FRHITexture* Texture, uint8 Index) { #if PLATFORM_SUPPORTS_BINDLESS_RENDERING if(IsMetalBindlessEnabled()) { StateCache.IRMakeTextureResident(Frequency, GetMetalSurfaceFromRHITexture(Texture)->Texture.get()); } else #endif { StateCache.SetShaderTexture(Frequency, GetMetalSurfaceFromRHITexture(Texture)->Texture.get(), Index, MTL::ResourceUsage(MTL::ResourceUsageRead | MTL::ResourceUsageSample)); } } void SetSampler(FRHISamplerState* Sampler, uint8 Index) { if(!IsMetalBindlessEnabled()) { StateCache.SetShaderSamplerState(Frequency, static_cast(Sampler), Index); } } #if PLATFORM_SUPPORTS_BINDLESS_RENDERING void SetResourceCollection(FRHIResourceCollection* ResourceCollection, uint32 Index) { FMetalResourceCollection* MetalResourceCollection = ResourceCast(ResourceCollection); SetSRV(MetalResourceCollection->GetShaderResourceView(), Index); } #endif }; UE::RHI::Private::SetUniformBufferResourcesFromTables( FUniformResourceBinder { *this, Frequency } , *Shader , DirtyUniformBuffers[Frequency] , BoundUniformBuffers[Frequency] #if ENABLE_RHI_VALIDATION , nullptr /*Tracker*/ // @todo: the current structure of the Metal RHI prevents easily passing the RHI validation layer tracker here #endif ); } else { DirtyUniformBuffers[Frequency] = 0; } } #if METAL_USE_METAL_SHADER_CONVERTER FMetalBufferPtr FMetalStateCache::IRSideUploadToBuffer(void const* Content, uint64 Size) { FMetalTempAllocator* Allocator = Device.GetUniformAllocator(); FMetalBufferPtr Buffer = Allocator->Allocate(Size); memcpy((uint8_t*)Buffer->Contents(), Content, Size); return Buffer; } template void FMetalStateCache::IRBindResourcesToEncoder(ShaderType Shader, FMetalCommandEncoder* Encoder) { MTL::RenderStages RenderStage = (MTL::RenderStages)0; switch (FunctionType) { case MTL::FunctionTypeVertex: RenderStage |= MTLRenderStageVertex; break; case MTL::FunctionTypeFragment: RenderStage |= MTLRenderStageFragment; break; #if PLATFORM_SUPPORTS_MESH_SHADERS case MTL::FunctionTypeMesh: RenderStage |= MTLRenderStageMesh; break; case MTL::FunctionTypeObject: RenderStage |= MTLRenderStageObject; break; #endif default: break; }; if (!HeapsUsedByStage[Frequency].IsEmpty()) { Encoder->UseHeaps(HeapsUsedByStage[Frequency].Array(), FunctionType); } if (!ROResourcesByStage[Frequency].IsEmpty()) { Encoder->UseResources(ROResourcesByStage[Frequency].Array(), MTL::ResourceUsageRead, RenderStage); } if (!RWResourcesByStage[Frequency].IsEmpty()) { Encoder->UseResources(RWResourcesByStage[Frequency].Array(), MTL::ResourceUsage(MTL::ResourceUsageRead | MTL::ResourceUsageWrite), RenderStage); } HeapsUsedByStage[Frequency].Reset(); ROResourcesByStage[Frequency].Reset(); RWResourcesByStage[Frequency].Reset(); // Bind Standard/Sampler descriptor heaps. FMetalBindlessDescriptorManager* BindlessDescriptorManager = Device.GetBindlessDescriptorManager(); if(!OverriddenDescriptorHeap) { BindlessDescriptorManager->BindDescriptorHeapsToEncoder(Encoder, FunctionType, Frequency); } else { OverriddenDescriptorHeap->BindHeap(Encoder, FunctionType, kIRStandardHeapBindPoint); } // Bind CBV Table Encoder->SetShaderBytes(FunctionType, (const uint8*)CBVTable[Frequency], sizeof(uint64) * Shader->Bindings.RSNumCBVs, kIRArgumentBufferBindPoint); } void FMetalStateCache::IRMapVertexBuffers(MTL::RenderCommandEncoder* Encoder, bool bBindForMeshShaders) { #if PLATFORM_SUPPORTS_MESH_SHADERS if (bBindForMeshShaders) { Encoder->setMeshBytes(VertexBufferVAs, sizeof(VertexBufferVAs), kIRVertexBufferBindPoint); Encoder->setObjectBytes(VertexBufferVAs, sizeof(VertexBufferVAs), kIRVertexBufferBindPoint); } else #endif { Encoder->setVertexBytes(VertexBufferVAs, sizeof(VertexBufferVAs), kIRVertexBufferBindPoint); } } #endif void FMetalStateCache::CommitRenderResources(FMetalCommandEncoder* Raster) { check(IsValidRef(GraphicsPSO)); #if PLATFORM_SUPPORTS_GEOMETRY_SHADERS bool bIsUsingGeometryEmulation = IsValidRef(GraphicsPSO->GeometryShader); #endif #if PLATFORM_SUPPORTS_MESH_SHADERS if (IsValidRef(GraphicsPSO->MeshShader)) { SetResourcesFromTables(GraphicsPSO->MeshShader, CrossCompiler::SHADER_STAGE_MESH); GetShaderParameters(EMetalShaderStages::Mesh).CommitPackedGlobals(this, Raster, EMetalShaderStages::Mesh, GraphicsPSO->MeshShader->Bindings); IRBindResourcesToEncoder(GraphicsPSO->MeshShader, Raster); if (IsValidRef(GraphicsPSO->AmplificationShader)) { SetResourcesFromTables(GraphicsPSO->AmplificationShader, CrossCompiler::SHADER_STAGE_AMPLIFICATION); GetShaderParameters(EMetalShaderStages::Amplification).CommitPackedGlobals(this, Raster, EMetalShaderStages::Amplification, GraphicsPSO->AmplificationShader->Bindings); IRBindResourcesToEncoder(GraphicsPSO->AmplificationShader, Raster); } } else #endif { SetResourcesFromTables(GraphicsPSO->VertexShader, CrossCompiler::SHADER_STAGE_VERTEX); GetShaderParameters(EMetalShaderStages::Vertex).CommitPackedGlobals(this, Raster, EMetalShaderStages::Vertex, GraphicsPSO->VertexShader->Bindings); #if METAL_USE_METAL_SHADER_CONVERTER if(IsMetalBindlessEnabled()) { #if PLATFORM_SUPPORTS_GEOMETRY_SHADERS if (bIsUsingGeometryEmulation) { IRBindResourcesToEncoder(GraphicsPSO->VertexShader, Raster); } else #endif { IRBindResourcesToEncoder(GraphicsPSO->VertexShader, Raster); } } #endif } #if PLATFORM_SUPPORTS_GEOMETRY_SHADERS if (IsValidRef(GraphicsPSO->GeometryShader)) { SetResourcesFromTables(GraphicsPSO->GeometryShader, CrossCompiler::SHADER_STAGE_GEOMETRY); GetShaderParameters(EMetalShaderStages::Geometry).CommitPackedGlobals(this, Raster, EMetalShaderStages::Geometry, GraphicsPSO->GeometryShader->Bindings); #if METAL_USE_METAL_SHADER_CONVERTER if(IsMetalBindlessEnabled()) { IRBindResourcesToEncoder(GraphicsPSO->GeometryShader, Raster); } } #endif #endif if (IsValidRef(GraphicsPSO->PixelShader)) { SetResourcesFromTables(GraphicsPSO->PixelShader, CrossCompiler::SHADER_STAGE_PIXEL); GetShaderParameters(EMetalShaderStages::Pixel).CommitPackedGlobals(this, Raster, EMetalShaderStages::Pixel, GraphicsPSO->PixelShader->Bindings); #if METAL_USE_METAL_SHADER_CONVERTER if(IsMetalBindlessEnabled()) { IRBindResourcesToEncoder(GraphicsPSO->PixelShader, Raster); } #endif } } void FMetalStateCache::CommitComputeResources(FMetalCommandEncoder* Compute) { check(IsValidRef(ComputeShader)); SetResourcesFromTables(ComputeShader, CrossCompiler::SHADER_STAGE_COMPUTE); if(!OverriddenDescriptorHeap) { GetShaderParameters(EMetalShaderStages::Compute).CommitPackedGlobals(this, Compute, EMetalShaderStages::Compute, ComputeShader->Bindings); } #if METAL_USE_METAL_SHADER_CONVERTER if(IsMetalBindlessEnabled()) { IRBindResourcesToEncoder(ComputeShader, Compute); } #endif } void FMetalStateCache::StartRenderPass(const FRHIRenderPassInfo& InInfo, FMetalQueryBuffer* QueryBuffer, MTL::RenderPassDescriptor* InDesc, bool bInIsParallelContext) { bIsParallelContext = bInIsParallelContext; FRHIRenderPassInfo Info = InInfo; ERenderTargetActions DepthActions = GetDepthActions(Info.DepthStencilRenderTarget.Action); ERenderTargetActions StencilActions = GetStencilActions(Info.DepthStencilRenderTarget.Action); ERenderTargetLoadAction DepthLoadAction = GetLoadAction(DepthActions); ERenderTargetStoreAction DepthStoreAction = GetStoreAction(DepthActions); ERenderTargetLoadAction StencilLoadAction = GetLoadAction(StencilActions); ERenderTargetStoreAction StencilStoreAction = GetStoreAction(StencilActions); if (Info.DepthStencilRenderTarget.DepthStencilTarget) { if (DepthLoadAction != ERenderTargetLoadAction::EClear) { DepthLoadAction = ERenderTargetLoadAction::ELoad; } if (Info.DepthStencilRenderTarget.ExclusiveDepthStencil.IsDepthWrite()) { DepthStoreAction = ERenderTargetStoreAction::EStore; } if (StencilLoadAction != ERenderTargetLoadAction::EClear) { StencilLoadAction = ERenderTargetLoadAction::ELoad; } if (Info.DepthStencilRenderTarget.ExclusiveDepthStencil.IsStencilWrite()) { StencilStoreAction = ERenderTargetStoreAction::EStore; } DepthActions = MakeRenderTargetActions(DepthLoadAction, DepthStoreAction); StencilActions = MakeRenderTargetActions(StencilLoadAction, StencilStoreAction); Info.DepthStencilRenderTarget.Action = MakeDepthStencilTargetActions(DepthActions, StencilActions); } for (int32 RenderTargetIndex = 0; RenderTargetIndex < Info.GetNumColorRenderTargets(); RenderTargetIndex++) { FRHIRenderPassInfo::FColorEntry& RenderTargetView = Info.ColorRenderTargets[RenderTargetIndex]; ERenderTargetLoadAction LoadAction = GetLoadAction(RenderTargetView.Action); ERenderTargetStoreAction StoreAction = GetStoreAction(RenderTargetView.Action); if (LoadAction == ERenderTargetLoadAction::EClear) { StoreAction == ERenderTargetStoreAction::EStore; } else { LoadAction = ERenderTargetLoadAction::ELoad; } RenderTargetView.Action = MakeRenderTargetActions(LoadAction, StoreAction); } check(!bIsParallelContext || InDesc); InvalidateRenderTargets(); if(InDesc) { RenderPassDesc = InDesc; } else { SetRenderPassInfo(Info, QueryBuffer); } } void FMetalStateCache::EndRenderPass() { // If we are running if(bIsParallelContext) { RenderPassDesc = nullptr; bIsParallelContext = false; } } void FMetalStateCache::SetStateDirty(void) { RasterBits = UINT32_MAX; PipelineBits = EMetalPipelineFlagMask; for (uint32 i = 0; i < EMetalShaderStages::Num; i++) { ShaderBuffers[i].Bound = UINT32_MAX; ShaderTextures[i].Bound = FMETALTEXTUREMASK_MAX; ShaderSamplers[i].Bound = UINT16_MAX; } } void FMetalStateCache::SetShaderBufferDirty(EMetalShaderStages const Frequency, NS::UInteger const Index) { ShaderBuffers[Frequency].Bound |= (1 << Index); } void FMetalStateCache::SetRenderStoreActions(FMetalCommandEncoder& CommandEncoder, bool const bConditionalSwitch) { check(CommandEncoder.IsRenderCommandEncoderActive() || CommandEncoder.IsParallelRenderCommandEncoderActive()) { if (bConditionalSwitch) { MTL::RenderPassColorAttachmentDescriptorArray* ColorAttachments = RenderPassDesc->colorAttachments(); for (int32 RenderTargetIndex = 0; RenderTargetIndex < RenderPassInfo.GetNumColorRenderTargets(); RenderTargetIndex++) { FRHIRenderPassInfo::FColorEntry& RenderTargetView = RenderPassInfo.ColorRenderTargets[RenderTargetIndex]; if(RenderTargetView.RenderTarget != nullptr) { const bool bMultiSampled = (ColorAttachments->object(RenderTargetIndex)->texture()->sampleCount() > 1); ColorStore[RenderTargetIndex] = GetConditionalMetalRTStoreAction(Device, bMultiSampled); } } if (RenderPassInfo.DepthStencilRenderTarget.DepthStencilTarget) { const bool bMultiSampled = RenderPassDesc->depthAttachment()->texture() && (RenderPassDesc->depthAttachment()->texture()->sampleCount() > 1); DepthStore = GetConditionalMetalRTStoreAction(Device, bMultiSampled); StencilStore = GetConditionalMetalRTStoreAction(Device, false); } } CommandEncoder.SetRenderPassStoreActions(ColorStore, DepthStore, StencilStore); } } void FMetalStateCache::FlushVisibilityResults(FMetalCommandEncoder& CommandEncoder) { #if PLATFORM_MAC if(VisibilityResults && VisibilityResults->Buffer && VisibilityResults->Buffer->GetMTLBuffer()->storageMode() == MTL::StorageModeManaged && VisibilityWritten && CommandEncoder.IsRenderCommandEncoderActive()) { MTL::BlitCommandEncoder* Encoder = CommandEncoder.GetBlitCommandEncoder(); METAL_GPUPROFILE(FMetalProfiler::GetProfiler()->EncodeBlit(CommandEncoder.GetCommandBufferStats(), __FUNCTION__)); Encoder->synchronizeResource(VisibilityResults->Buffer->GetMTLBuffer()); VisibilityWritten = 0; } #endif } void FMetalStateCache::SetRenderState(FMetalCommandEncoder& CommandEncoder) { SCOPE_CYCLE_COUNTER(STAT_MetalSetRenderStateTime); if (RasterBits) { if (RasterBits & EMetalRenderFlagViewport) { CommandEncoder.SetViewport(Viewport, ActiveViewports); } if (RasterBits & EMetalRenderFlagFrontFacingWinding) { CommandEncoder.SetFrontFacingWinding(MTL::WindingCounterClockwise); } if (RasterBits & EMetalRenderFlagCullMode) { check(IsValidRef(RasterizerState)); CommandEncoder.SetCullMode(TranslateCullMode(RasterizerState->State.CullMode)); } if (RasterBits & EMetalRenderFlagDepthBias) { check(IsValidRef(RasterizerState)); CommandEncoder.SetDepthBias(RasterizerState->State.DepthBias, RasterizerState->State.SlopeScaleDepthBias, FLT_MAX); } if ((RasterBits & EMetalRenderFlagScissorRect)) { CommandEncoder.SetScissorRect(Scissor, ActiveScissors); } if (RasterBits & EMetalRenderFlagTriangleFillMode) { check(IsValidRef(RasterizerState)); CommandEncoder.SetTriangleFillMode(TranslateFillMode(RasterizerState->State.FillMode)); } if (RasterBits & EMetalRenderFlagBlendColor) { CommandEncoder.SetBlendColor(BlendFactor.R, BlendFactor.G, BlendFactor.B, BlendFactor.A); } if (RasterBits & EMetalRenderFlagDepthStencilState) { check(IsValidRef(DepthStencilState)); if (DepthStencilState && RenderPassDesc && Device.GetRuntimeDebuggingLevel() >= EMetalDebugLevelFastValidation) { METAL_FATAL_ASSERT(DepthStencilState->bIsDepthWriteEnabled == false || (RenderPassDesc->depthAttachment() && RenderPassDesc->depthAttachment()->texture()) , TEXT("Attempting to set a depth-stencil state that writes depth but no depth texture is configured!\nState: %s\nRender Pass: %s"), *NSStringToFString(DepthStencilState->State->description()), *NSStringToFString(RenderPassDesc->description())); METAL_FATAL_ASSERT(DepthStencilState->bIsStencilWriteEnabled == false || (RenderPassDesc->stencilAttachment() && RenderPassDesc->stencilAttachment()->texture()), TEXT("Attempting to set a depth-stencil state that writes stencil but no stencil texture is configured!\nState: %s\nRender Pass: %s"), *NSStringToFString(DepthStencilState->State->description()), *NSStringToFString(RenderPassDesc->description())); } CommandEncoder.SetDepthStencilState(DepthStencilState ? DepthStencilState->State : nullptr); } if (RasterBits & EMetalRenderFlagStencilReferenceValue) { CommandEncoder.SetStencilReferenceValue(StencilRef); } if (RasterBits & EMetalRenderFlagVisibilityResultMode) { CommandEncoder.SetVisibilityResultMode(VisibilityMode, VisibilityOffset); if (VisibilityMode != MTL::VisibilityResultModeDisabled) { VisibilityWritten = VisibilityOffset + FMetalQueryBufferPool::EQueryResultMaxSize; } } if (RasterBits & EMetalRenderFlagDepthClipMode) { check(IsValidRef(RasterizerState)); CommandEncoder.SetDepthClipMode(TranslateDepthClipMode(RasterizerState->State.DepthClipMode)); } RasterBits = 0; } } void FMetalStateCache::EnsureTextureAndType(EMetalShaderStages Stage, uint32 Index, const TMap& TexTypes) const { #if UE_BUILD_DEBUG || UE_BUILD_DEVELOPMENT if (ShaderTextures[Stage].Textures[Index]) { if (ShaderTextures[Stage].Textures[Index]->textureType() != TexTypes.FindRef(Index)) { ensureMsgf(0, TEXT("Mismatched texture type: EMetalShaderStages %d, Index %d, ShaderTextureType %d != TexTypes %d"), (uint32)Stage, Index, (uint32)ShaderTextures[Stage].Textures[Index]->textureType(), (uint32)TexTypes.FindRef(Index)); } } else { ensureMsgf(0, TEXT("NULL texture: EMetalShaderStages %d, Index %d"), (uint32)Stage, Index); } #endif } /** Validates the pipeline/binding state */ bool FMetalStateCache::ValidateFunctionBindings(FMetalShaderPipeline* Pipeline, EMetalShaderFrequency Frequency) { bool bOK = true; #if METAL_DEBUG_OPTIONS if (!Pipeline->RenderPipelineReflection) { return true; } if (Device.GetRuntimeDebuggingLevel() >= EMetalDebugLevelConditionalSubmit) { check(Pipeline); MTLRenderPipelineReflectionPtr Reflection = Pipeline->RenderPipelineReflection; check(Reflection); if (@available(macOS 13.0, iOS 16.0, *)) { NS::Array* Bindings = nullptr; switch(Frequency) { case EMetalShaderVertex: { Bindings = Reflection->vertexBindings(); break; } case EMetalShaderFragment: { Bindings = Reflection->fragmentBindings(); break; } default: check(false); break; } for (uint32 i = 0; i < Bindings->count(); i++) { MTL::Binding* Binding = (MTL::Binding*)Bindings->object(i); check(Binding); switch(Binding->type()) { case MTL::BindingTypeBuffer: { checkf(Binding->index() < ML_MaxBuffers, TEXT("Metal buffer index exceeded!")); if (Pipeline->ResourceMask[Frequency].BufferMask & (1 << Binding->index())) { if (ShaderBuffers[Frequency].Buffers[Binding->index()].Buffer == nullptr && ShaderBuffers[Frequency].Buffers[Binding->index()].Bytes == nullptr) { bOK = false; UE_LOG(LogMetal, Warning, TEXT("Unbound buffer at Metal index %u which will crash the driver: %s"), (uint32)Binding->index(), *NSStringToFString(Binding->description())); } } break; } case MTL::BindingTypeThreadgroupMemory: { break; } case MTL::BindingTypeTexture: { MTL::TextureBinding* TextureBinding = (MTL::TextureBinding*)Bindings->object(i); checkf(Binding->index() < ML_MaxTextures, TEXT("Metal texture index exceeded!")); if (ShaderTextures[Frequency].Textures[Binding->index()] == nullptr) { bOK = false; UE_LOG(LogMetal, Warning, TEXT("Unbound texture at Metal index %u which will crash the driver: %s"), (uint32)Binding->index(), *NSStringToFString(Binding->description())); } else if (ShaderTextures[Frequency].Textures[Binding->index()]->textureType() != TextureBinding->textureType()) { bOK = false; UE_LOG(LogMetal, Warning, TEXT("Incorrect texture type bound at Metal index %u which will crash the driver: %s\n%s"), (uint32)Binding->index(), *NSStringToFString(Binding->description()), *NSStringToFString(ShaderTextures[Frequency].Textures[Binding->index()]->description())); } break; } case MTL::BindingTypeSampler: { checkf(Binding->index() < ML_MaxSamplers, TEXT("Metal sampler index exceeded!")); if (ShaderSamplers[Frequency].Samplers[Binding->index()] == nullptr) { bOK = false; UE_LOG(LogMetal, Warning, TEXT("Unbound sampler at Metal index %u which will crash the driver: %s"), (uint32)Binding->index(), *NSStringToFString(Binding->description())); } break; } default: check(false); break; } } } else { NS::Array* Arguments = nullptr; switch(Frequency) { case EMetalShaderVertex: { Arguments = Reflection->vertexArguments(); break; } case EMetalShaderFragment: { Arguments = Reflection->fragmentArguments(); break; } default: check(false); break; } for (uint32 i = 0; i < Arguments->count(); i++) { MTL::Argument* Arg = (MTL::Argument*)Arguments->object(i); check(Arg); switch(Arg->type()) { case MTL::ArgumentTypeBuffer: { checkf(Arg->index() < ML_MaxBuffers, TEXT("Metal buffer index exceeded!")); if (NSStringToFString(Arg->name()) != TEXT("BufferSizes") && NSStringToFString(Arg->name()) != TEXT("spvBufferSizeConstants")) { if (ShaderBuffers[Frequency].Buffers[Arg->index()].Buffer == nullptr && ShaderBuffers[Frequency].Buffers[Arg->index()].Bytes == nullptr) { bOK = false; UE_LOG(LogMetal, Warning, TEXT("Unbound buffer at Metal index %u which will crash the driver: %s"), (uint32)Arg->index(), *NSStringToFString(Arg->description())); } } break; } case MTL::ArgumentTypeThreadgroupMemory: { break; } case MTL::ArgumentTypeTexture: { checkf(Arg->index() < ML_MaxTextures, TEXT("Metal texture index exceeded!")); if (ShaderTextures[Frequency].Textures[Arg->index()] == nullptr) { bOK = false; UE_LOG(LogMetal, Warning, TEXT("Unbound texture at Metal index %u which will crash the driver: %s"), (uint32)Arg->index(), *NSStringToFString(Arg->description())); } else if (ShaderTextures[Frequency].Textures[Arg->index()]->textureType() != Arg->textureType()) { bOK = false; UE_LOG(LogMetal, Warning, TEXT("Incorrect texture type bound at Metal index %u which will crash the driver: %s\n%s"), (uint32)Arg->index(), *NSStringToFString(Arg->description()), *NSStringToFString(ShaderTextures[Frequency].Textures[Arg->index()]->description())); } break; } case MTL::ArgumentTypeSampler: { checkf(Arg->index() < ML_MaxSamplers, TEXT("Metal sampler index exceeded!")); if (ShaderSamplers[Frequency].Samplers[Arg->index()] == nullptr) { bOK = false; UE_LOG(LogMetal, Warning, TEXT("Unbound sampler at Metal index %u which will crash the driver: %s"), (uint32)Arg->index(), *NSStringToFString(Arg->description())); } break; } default: check(false); break; } } } } #endif return bOK; } void FMetalStateCache::Validate() { #if METAL_DEBUG_OPTIONS FMetalShaderPipeline* Pipeline = GetPipelineState(); bool bOK = ValidateFunctionBindings(Pipeline, EMetalShaderVertex); if (!bOK) { UE_LOG(LogMetal, Error, TEXT("Metal Validation failures for vertex shader:\n%s"), Pipeline->VertexSource ? *NSStringToFString(Pipeline->VertexSource) : TEXT("nullptr")); } bOK = ValidateFunctionBindings(GetPipelineState(), EMetalShaderFragment); if (!bOK) { UE_LOG(LogMetal, Error, TEXT("Metal Validation failures for fragment shader:\n%s"), Pipeline->FragmentSource ? *NSStringToFString(Pipeline->VertexSource) : TEXT("nullptr")); } #endif } void FMetalStateCache::SetRenderPipelineState(FMetalCommandEncoder& CommandEncoder) { SCOPE_CYCLE_COUNTER(STAT_MetalSetRenderPipelineStateTime); if ((PipelineBits & EMetalPipelineFlagRasterMask) != 0) { // Some Intel drivers need RenderPipeline state to be set after DepthStencil state to work properly FMetalShaderPipeline* Pipeline = GetPipelineState(); check(Pipeline); CommandEncoder.SetRenderPipelineState(Pipeline); #if METAL_USE_METAL_SHADER_CONVERTER FMetalBindlessDescriptorManager* BindlessDescriptorManager = Device.GetBindlessDescriptorManager(); if(IsMetalBindlessEnabled()) { #if PLATFORM_SUPPORTS_MESH_SHADERS if (GraphicsPSO->VertexDeclaration != nullptr) #endif // PLATFORM_SUPPORTS_MESH_SHADERS { // Update the stride table for Vertex input (done only once as this is constant/per pipeline). uint32* InputSlotStrides = GraphicsPSO->VertexDeclaration->InputDescriptorBufferStrides.GetData(); for (uint32 i = 0; i < MaxVertexElementCount; i++) { VertexBufferVAs[i].stride = InputSlotStrides[i]; } } } #endif PipelineBits &= EMetalPipelineFlagComputeMask; } #if METAL_DEBUG_OPTIONS #if PLATFORM_SUPPORTS_BINDLESS_RENDERING FMetalBindlessDescriptorManager* BindlessDescriptorManager = Device.GetBindlessDescriptorManager(); if(!IsMetalBindlessEnabled()) #endif { Validate(); if (Device.GetRuntimeDebuggingLevel() >= EMetalDebugLevelFastValidation) { FMetalShaderPipeline* Pipeline = GetPipelineState(); EMetalShaderStages VertexStage = EMetalShaderStages::Vertex; FMetalShaderResourceMask VertexMask = Pipeline->ResourceMask[EMetalShaderVertex]; TArray& MinVertexBufferSizes = Pipeline->BufferDataSizes[EMetalShaderVertex]; const TMap& VertexTexTypes = Pipeline->TextureTypes[EMetalShaderVertex]; while(VertexMask.BufferMask) { uint32 Index = __builtin_ctz(VertexMask.BufferMask); VertexMask.BufferMask &= ~(1 << Index); if (VertexStage == EMetalShaderStages::Vertex) { FMetalBufferBinding const& Binding = ShaderBuffers[VertexStage].Buffers[Index]; ensure(Binding.Buffer || Binding.Bytes); ensure(MinVertexBufferSizes.Num() > Index); ensure(Binding.Length >= MinVertexBufferSizes[Index]); } } #if PLATFORM_MAC { uint64 LoTextures = (uint64)VertexMask.TextureMask; while(LoTextures) { uint32 Index = __builtin_ctzll(LoTextures); LoTextures &= ~(uint64(1) << uint64(Index)); EnsureTextureAndType(VertexStage, Index, VertexTexTypes); } uint64 HiTextures = (uint64)(VertexMask.TextureMask >> FMetalTextureMask(64)); while(HiTextures) { uint32 Index = __builtin_ctzll(HiTextures); HiTextures &= ~(uint64(1) << uint64(Index)); EnsureTextureAndType(VertexStage, Index + 64, VertexTexTypes); } } #else while(VertexMask.TextureMask) { uint32 Index = __builtin_ctz(VertexMask.TextureMask); VertexMask.TextureMask &= ~(1 << Index); EnsureTextureAndType(VertexStage, Index, VertexTexTypes); } #endif while(VertexMask.SamplerMask) { uint32 Index = __builtin_ctz(VertexMask.SamplerMask); VertexMask.SamplerMask &= ~(1 << Index); ensure(ShaderSamplers[VertexStage].Samplers[Index]); } FMetalShaderResourceMask FragmentMask = Pipeline->ResourceMask[EMetalShaderFragment]; TArray& MinFragmentBufferSizes = Pipeline->BufferDataSizes[EMetalShaderFragment]; const TMap& FragmentTexTypes = Pipeline->TextureTypes[EMetalShaderFragment]; while(FragmentMask.BufferMask) { uint32 Index = __builtin_ctz(FragmentMask.BufferMask); FragmentMask.BufferMask &= ~(1 << Index); FMetalBufferBinding const& Binding = ShaderBuffers[EMetalShaderStages::Pixel].Buffers[Index]; ensure(Binding.Buffer || Binding.Bytes); ensure(MinFragmentBufferSizes.Num() > Index); ensure(Binding.Length >= MinFragmentBufferSizes[Index]); } #if PLATFORM_MAC { uint64 LoTextures = (uint64)FragmentMask.TextureMask; while(LoTextures) { uint32 Index = __builtin_ctzll(LoTextures); LoTextures &= ~(uint64(1) << uint64(Index)); EnsureTextureAndType(EMetalShaderStages::Pixel, Index, FragmentTexTypes); } uint64 HiTextures = (uint64)(FragmentMask.TextureMask >> FMetalTextureMask(64)); while(HiTextures) { uint32 Index = __builtin_ctzll(HiTextures); HiTextures &= ~(uint64(1) << uint64(Index)); EnsureTextureAndType(EMetalShaderStages::Pixel, Index + 64, FragmentTexTypes); } } #else while(FragmentMask.TextureMask) { uint32 Index = __builtin_ctz(FragmentMask.TextureMask); FragmentMask.TextureMask &= ~(1 << Index); EnsureTextureAndType(EMetalShaderStages::Pixel, Index, FragmentTexTypes); } #endif while(FragmentMask.SamplerMask) { uint32 Index = __builtin_ctz(FragmentMask.SamplerMask); FragmentMask.SamplerMask &= ~(1 << Index); ensure(ShaderSamplers[EMetalShaderStages::Pixel].Samplers[Index]); } } } #endif // METAL_DEBUG_OPTIONS } void FMetalStateCache::SetComputePipelineState(FMetalCommandEncoder& CommandEncoder) { if ((PipelineBits & EMetalPipelineFlagComputeMask) != 0) { if(PreviousComputeShader != ComputeShader) { FMetalShaderPipelinePtr Pipeline = ComputeShader->GetPipeline(); check(Pipeline); CommandEncoder.SetComputePipelineState(Pipeline); PipelineBits &= EMetalPipelineFlagRasterMask; PreviousComputeShader = ComputeShader; } } if (Device.GetRuntimeDebuggingLevel() >= EMetalDebugLevelFastValidation) { FMetalShaderPipelinePtr Pipeline = ComputeShader->GetPipeline(); check(Pipeline); FMetalShaderResourceMask ComputeMask = Pipeline->ResourceMask[EMetalShaderCompute]; TArray& MinComputeBufferSizes = Pipeline->BufferDataSizes[EMetalShaderCompute]; const TMap& ComputeTexTypes = Pipeline->TextureTypes[EMetalShaderCompute]; while(ComputeMask.BufferMask) { uint32 Index = __builtin_ctz(ComputeMask.BufferMask); ComputeMask.BufferMask &= ~(1 << Index); FMetalBufferBinding const& Binding = ShaderBuffers[EMetalShaderStages::Compute].Buffers[Index]; ensure(Binding.Buffer || Binding.Bytes); ensure(MinComputeBufferSizes.Num() > Index); ensure(Binding.Length >= MinComputeBufferSizes[Index]); } #if PLATFORM_MAC { uint64 LoTextures = (uint64)ComputeMask.TextureMask; while(LoTextures) { uint32 Index = __builtin_ctzll(LoTextures); LoTextures &= ~(uint64(1) << uint64(Index)); EnsureTextureAndType(EMetalShaderStages::Compute, Index, ComputeTexTypes); } uint64 HiTextures = (uint64)(ComputeMask.TextureMask >> FMetalTextureMask(64)); while(HiTextures) { uint32 Index = __builtin_ctzll(HiTextures); HiTextures &= ~(uint64(1) << uint64(Index)); EnsureTextureAndType(EMetalShaderStages::Compute, Index + 64, ComputeTexTypes); } } #else while(ComputeMask.TextureMask) { uint32 Index = __builtin_ctz(ComputeMask.TextureMask); ComputeMask.TextureMask &= ~(1 << Index); EnsureTextureAndType(EMetalShaderStages::Compute, Index, ComputeTexTypes); } #endif while(ComputeMask.SamplerMask) { uint32 Index = __builtin_ctz(ComputeMask.SamplerMask); ComputeMask.SamplerMask &= ~(1 << Index); ensure(ShaderSamplers[EMetalShaderStages::Compute].Samplers[Index]); } } } void FMetalStateCache::CommitResourceTable(EMetalShaderStages const Frequency, MTL::FunctionType const Type, FMetalCommandEncoder& CommandEncoder) { FMetalBufferBindings& BufferBindings = ShaderBuffers[Frequency]; while(BufferBindings.Bound) { uint32 Index = __builtin_ctz(BufferBindings.Bound); BufferBindings.Bound &= ~(1 << Index); if (Index < ML_MaxBuffers) { FMetalBufferBinding& Binding = BufferBindings.Buffers[Index]; if (Binding.Buffer) { CommandEncoder.SetShaderBuffer(Type, Binding.Buffer, Binding.Offset, Binding.Length, Index, Binding.Usage, BufferBindings.Formats[Index], Binding.ElementRowPitch, Binding.ReferencedResources); } else if (Binding.Bytes) { CommandEncoder.SetShaderData(Type, Binding.Bytes, Binding.Offset, Index, BufferBindings.Formats[Index], Binding.ElementRowPitch); } #if METAL_RHI_RAYTRACING else if (Binding.AccelerationStructure) { CommandEncoder.SetShaderAccelerationStructure(Type, Binding.AccelerationStructure, Index); } #endif // METAL_RHI_RAYTRACING } } FMetalTextureBindings& TextureBindings = ShaderTextures[Frequency]; #if PLATFORM_MAC uint64 LoTextures = (uint64)TextureBindings.Bound; while(LoTextures) { uint32 Index = __builtin_ctzll(LoTextures); LoTextures &= ~(uint64(1) << uint64(Index)); if (Index < ML_MaxTextures && TextureBindings.Textures[Index]) { CommandEncoder.SetShaderTexture(Type, TextureBindings.Textures[Index], Index, TextureBindings.Usage[Index]); } } uint64 HiTextures = (uint64)(TextureBindings.Bound >> FMetalTextureMask(64)); while(HiTextures) { uint32 Index = __builtin_ctzll(HiTextures); HiTextures &= ~(uint64(1) << uint64(Index)); Index += 64; if (Index < ML_MaxTextures && TextureBindings.Textures[Index]) { CommandEncoder.SetShaderTexture(Type, TextureBindings.Textures[Index], Index, TextureBindings.Usage[Index]); } } TextureBindings.Bound = FMetalTextureMask(LoTextures) | (FMetalTextureMask(HiTextures) << FMetalTextureMask(64)); check(TextureBindings.Bound == 0); #else while(TextureBindings.Bound) { uint32 Index = __builtin_ctz(TextureBindings.Bound); TextureBindings.Bound &= ~(FMetalTextureMask(FMetalTextureMask(1) << FMetalTextureMask(Index))); if (Index < ML_MaxTextures && TextureBindings.Textures[Index]) { CommandEncoder.SetShaderTexture(Type, TextureBindings.Textures[Index], Index, TextureBindings.Usage[Index]); } } #endif FMetalSamplerBindings& SamplerBindings = ShaderSamplers[Frequency]; while(SamplerBindings.Bound) { uint32 Index = __builtin_ctz(SamplerBindings.Bound); SamplerBindings.Bound &= ~(1 << Index); if (Index < ML_MaxSamplers && SamplerBindings.Samplers[Index]) { CommandEncoder.SetShaderSamplerState(Type, SamplerBindings.Samplers[Index], Index); } } } void FMetalStateCache::DiscardRenderTargets(bool Depth, bool Stencil, uint32 ColorBitMask) { if (Depth) { switch (DepthStore) { case MTL::StoreActionUnknown: case MTL::StoreActionStore: DepthStore = MTL::StoreActionDontCare; break; case MTL::StoreActionStoreAndMultisampleResolve: DepthStore = MTL::StoreActionMultisampleResolve; break; default: break; } } if (Stencil) { StencilStore = MTL::StoreActionDontCare; } for (uint32 Index = 0; Index < MaxSimultaneousRenderTargets; ++Index) { if ((ColorBitMask & (1u << Index)) != 0) { switch (ColorStore[Index]) { case MTL::StoreActionUnknown: case MTL::StoreActionStore: ColorStore[Index] = MTL::StoreActionDontCare; break; case MTL::StoreActionStoreAndMultisampleResolve: ColorStore[Index] = MTL::StoreActionMultisampleResolve; break; default: break; } } } } void FMetalStateCache::ReleaseDescriptor(MTL::RenderPassDescriptor* Desc) { FMetalRenderPassDescriptorPool::Get().ReleaseDescriptor(Desc); } void FMetalStateCache::ClearPreviousComputeState() { PreviousComputeShader.SafeRelease(); }