Files
UnrealEngine/Engine/Source/Runtime/VulkanRHI/Private/VulkanBarriers.cpp
2025-05-18 13:04:45 +08:00

1855 lines
67 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved..
#include "VulkanRHIPrivate.h"
#include "VulkanBarriers.h"
#include "VulkanContext.h"
#include "VulkanPendingState.h"
#include "ProfilingDebugging/RealtimeGPUProfiler.h"
#include "RHICoreTransitions.h"
// All shader stages supported by VK device - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, FRAGMENT etc
extern uint32 GVulkanDevicePipelineStageBits;
extern int32 GVulkanAllowConcurrentBuffer;
extern int32 GVulkanAllowConcurrentImage;
int32 GVulkanUseMemoryBarrierOpt = 1;
static FAutoConsoleVariableRef CVarVulkanUseMemoryBarrierOpt(
TEXT("r.Vulkan.UseMemoryBarrierOpt"),
GVulkanUseMemoryBarrierOpt,
TEXT("Simplify buffer barriers and image barriers without layout transitions to a memory barrier.\n")
TEXT(" 0: Do not collapse to a single memory barrier, useful for tracking single resource transitions in external tools\n")
TEXT(" 1: Collapse to a memory barrier when appropriate (default)"),
ECVF_Default
);
int32 GVulkanMaxBarriersPerBatch = -1;
static FAutoConsoleVariableRef CVarVulkanMaxBarriersPerBatch(
TEXT("r.Vulkan.MaxBarriersPerBatch"),
GVulkanMaxBarriersPerBatch,
TEXT("Will limit the number of barriers sent per batch\n")
TEXT(" <=0: Do not limit (default)\n")
TEXT(" >0: Limit to the specified number\n"),
ECVF_Default
);
int32 GVulkanAllowSplitBarriers = 1;
static FAutoConsoleVariableRef CVarVulkanAllowSplitBarriers(
TEXT("r.Vulkan.AllowSplitBarriers"),
GVulkanAllowSplitBarriers,
TEXT("Will limit the number of barriers sent per batch\n")
TEXT(" 0: Disable split barriers\n")
TEXT(" 1: Allow split barriers using Synchronization2 events (default)\n"),
ECVF_Default
);
//
// The following two functions are used when the RHI needs to do image layout transitions internally.
// They are not used for the transitions requested through the public API (RHICreate/Begin/EndTransition)
// unless the initial state in ERHIAccess::Unknown, in which case the tracking code kicks in.
//
static VkAccessFlags GetVkAccessMaskForLayout(const VkImageLayout Layout)
{
VkAccessFlags Flags = 0;
switch (Layout)
{
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
Flags = VK_ACCESS_TRANSFER_READ_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
Flags = VK_ACCESS_TRANSFER_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
Flags = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
Flags = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL:
Flags = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
Flags = VK_ACCESS_SHADER_READ_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
case VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL:
Flags = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
break;
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
Flags = 0;
break;
case VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT:
Flags = VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT;
break;
case VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR:
Flags = VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR;
break;
case VK_IMAGE_LAYOUT_GENERAL:
// todo-jn: could be used for R64 in read layout
case VK_IMAGE_LAYOUT_UNDEFINED:
Flags = 0;
break;
case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL:
// todo-jn: sync2 currently only used by depth/stencil targets
Flags = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
// todo-jn: sync2 currently only used by depth/stencil targets
Flags = VK_ACCESS_SHADER_READ_BIT;
break;
default:
checkNoEntry();
break;
}
return Flags;
}
static VkPipelineStageFlags GetVkStageFlagsForLayout(VkImageLayout Layout)
{
VkPipelineStageFlags Flags = 0;
switch (Layout)
{
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
Flags = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
Flags = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
Flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
Flags = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
Flags = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL:
case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
case VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL:
Flags = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
Flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
break;
case VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT:
Flags = VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT;
break;
case VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR:
Flags = VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
break;
case VK_IMAGE_LAYOUT_GENERAL:
case VK_IMAGE_LAYOUT_UNDEFINED:
Flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
break;
case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL:
// todo-jn: sync2 currently only used by depth/stencil targets
Flags = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
// todo-jn: sync2 currently only used by depth/stencil targets
Flags = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
break;
default:
checkNoEntry();
break;
}
return Flags;
}
//
// Get the Vulkan stage flags, access flags and image layout (if relevant) corresponding to an ERHIAccess value from the public API.
//
static void GetVkStageAndAccessFlags(ERHIAccess RHIAccess, FRHITransitionInfo::EType ResourceType, uint32 UsageFlags, bool bIsDepthStencil, bool bSupportsReadOnlyOptimal, VkPipelineStageFlags& StageFlags, VkAccessFlags& AccessFlags, VkImageLayout& Layout, bool bIsSourceState)
{
// From Vulkan's point of view, when performing a multisample resolve via a render pass attachment, resolve targets are the same as render targets .
// The caller signals this situation by setting both the RTV and ResolveDst flags, and we simply remove ResolveDst in that case,
// to treat the resource as a render target.
const ERHIAccess ResolveAttachmentAccess = (ERHIAccess)(ERHIAccess::RTV | ERHIAccess::ResolveDst);
if (RHIAccess == ResolveAttachmentAccess)
{
RHIAccess = ERHIAccess::RTV;
}
// BVHRead state may be combined with SRV, but we always treat this as just BVHRead by clearing the SRV mask
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::BVHRead))
{
RHIAccess &= ~ERHIAccess::SRVMask;
}
Layout = VK_IMAGE_LAYOUT_UNDEFINED;
// The layout to use if SRV access is requested. In case of depth/stencil buffers, we don't need to worry about different states for the separate aspects, since that's handled explicitly elsewhere,
// and this function is never called for depth-only or stencil-only transitions.
const VkImageLayout SRVLayout =
bIsDepthStencil ? VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL :
bSupportsReadOnlyOptimal ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
// States which cannot be combined.
switch (RHIAccess)
{
case ERHIAccess::Discard:
// FIXME: Align with Unknown for now, this could perhaps be less brutal
StageFlags = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
AccessFlags = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
Layout = bIsSourceState ? VK_IMAGE_LAYOUT_UNDEFINED : SRVLayout;
return;
case ERHIAccess::Unknown:
// We don't know where this is coming from, so we'll stall everything.
StageFlags = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
AccessFlags = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
return;
case ERHIAccess::CPURead:
StageFlags = VK_PIPELINE_STAGE_HOST_BIT;
AccessFlags = VK_ACCESS_HOST_READ_BIT;
Layout = VK_IMAGE_LAYOUT_GENERAL;
return;
case ERHIAccess::Present:
StageFlags = bIsSourceState ? VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT : VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
// When transitionning out of present, the sema handles access.
// When transitioning into present, vkQueuePresentKHR guarantees visibility.
AccessFlags = 0;
Layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
return;
case ERHIAccess::RTV:
StageFlags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
AccessFlags = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
Layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
return;
case ERHIAccess::CopyDest:
StageFlags = VK_PIPELINE_STAGE_TRANSFER_BIT;
AccessFlags = VK_ACCESS_TRANSFER_WRITE_BIT;
Layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
return;
case ERHIAccess::ResolveDst:
// Used when doing a resolve via RHICopyToResolveTarget. For us, it's the same as CopyDst.
StageFlags = VK_PIPELINE_STAGE_TRANSFER_BIT;
AccessFlags = VK_ACCESS_TRANSFER_WRITE_BIT;
Layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
return;
case ERHIAccess::BVHRead:
// vkrt todo: Finer grain stage flags would be ideal here.
StageFlags = VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
if (GRHISupportsRayTracingShaders) {
StageFlags |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
}
AccessFlags = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
return;
case ERHIAccess::BVHWrite:
// vkrt todo: Finer grain stage flags would be ideal here.
StageFlags = VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
if (GRHISupportsRayTracingShaders) {
StageFlags |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
}
AccessFlags = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
return;
}
// If DSVWrite is set, we ignore everything else because it decides the layout.
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::DSVWrite))
{
check(bIsDepthStencil);
StageFlags = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
AccessFlags = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
Layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
return;
}
// The remaining flags can be combined.
StageFlags = 0;
AccessFlags = 0;
uint32 ProcessedRHIFlags = 0;
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::IndirectArgs))
{
check(ResourceType != FRHITransitionInfo::EType::Texture);
StageFlags |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
AccessFlags |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
ProcessedRHIFlags |= (uint32)ERHIAccess::IndirectArgs;
}
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::VertexOrIndexBuffer))
{
check(ResourceType != FRHITransitionInfo::EType::Texture);
StageFlags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
switch (ResourceType)
{
case FRHITransitionInfo::EType::Buffer:
if ((UsageFlags & VK_BUFFER_USAGE_INDEX_BUFFER_BIT) != 0)
{
AccessFlags |= VK_ACCESS_INDEX_READ_BIT;
}
if ((UsageFlags & VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) != 0)
{
AccessFlags |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
}
break;
default:
checkNoEntry();
break;
}
ProcessedRHIFlags |= (uint32)ERHIAccess::VertexOrIndexBuffer;
}
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::DSVRead))
{
check(bIsDepthStencil);
StageFlags |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
AccessFlags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
// If any of the SRV flags is set, the code below will set Layout to SRVLayout again, but it's fine since
// SRVLayout takes into account bIsDepthStencil and ends up being the same as what we set here.
Layout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL;
ProcessedRHIFlags |= (uint32)ERHIAccess::DSVRead;
}
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::SRVGraphics))
{
StageFlags |= (GVulkanDevicePipelineStageBits & ~VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
AccessFlags |= VK_ACCESS_SHADER_READ_BIT;
if ((UsageFlags & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) != 0)
{
AccessFlags |= VK_ACCESS_UNIFORM_READ_BIT;
}
Layout = SRVLayout;
ProcessedRHIFlags |= (uint32)ERHIAccess::SRVGraphics;
}
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::SRVCompute))
{
StageFlags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
AccessFlags |= VK_ACCESS_SHADER_READ_BIT;
// There are cases where we ping-pong images between UAVCompute and SRVCompute. In that case it may be more efficient to leave the image in VK_IMAGE_LAYOUT_GENERAL
// (at the very least, it will mean fewer image barriers). There's no good way to detect this though, so it might be better if the high level code just did UAV
// to UAV transitions in that case, instead of SRV <-> UAV.
Layout = SRVLayout;
ProcessedRHIFlags |= (uint32)ERHIAccess::SRVCompute;
}
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::UAVGraphics))
{
StageFlags |= (GVulkanDevicePipelineStageBits & ~VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
AccessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
Layout = VK_IMAGE_LAYOUT_GENERAL;
ProcessedRHIFlags |= (uint32)ERHIAccess::UAVGraphics;
}
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::UAVCompute))
{
StageFlags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
AccessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
Layout = VK_IMAGE_LAYOUT_GENERAL;
ProcessedRHIFlags |= (uint32)ERHIAccess::UAVCompute;
}
// ResolveSrc is used when doing a resolve via RHICopyToResolveTarget. For us, it's the same as CopySrc.
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::CopySrc | ERHIAccess::ResolveSrc))
{
// If this is requested for a texture, behavior will depend on if we're combined with other flags
if (ResourceType == FRHITransitionInfo::EType::Texture)
{
// If no other RHIAccess is mixed in with our CopySrc, then use proper TRANSFER_SRC layout
if (Layout == VK_IMAGE_LAYOUT_UNDEFINED)
{
Layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
StageFlags = VK_PIPELINE_STAGE_TRANSFER_BIT;
AccessFlags = VK_ACCESS_TRANSFER_READ_BIT;
}
else
{
// If anything else is mixed in with the CopySrc, then go to the "catch all" GENERAL layout
Layout = VK_IMAGE_LAYOUT_GENERAL;
StageFlags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
AccessFlags |= VK_ACCESS_TRANSFER_READ_BIT;
}
}
else
{
StageFlags = VK_PIPELINE_STAGE_TRANSFER_BIT;
AccessFlags = VK_ACCESS_TRANSFER_READ_BIT;
}
ProcessedRHIFlags |= (uint32)(ERHIAccess::CopySrc | ERHIAccess::ResolveSrc);
}
if (EnumHasAnyFlags(RHIAccess, ERHIAccess::ShadingRateSource) && ValidateShadingRateDataType())
{
checkf(ResourceType == FRHITransitionInfo::EType::Texture, TEXT("A non-texture resource was tagged as a shading rate source; only textures (Texture2D and Texture2DArray) can be used for this purpose."));
if (GRHIVariableRateShadingImageDataType == VRSImage_Palette)
{
StageFlags = VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
AccessFlags = VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR;
Layout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
}
if (GRHIVariableRateShadingImageDataType == VRSImage_Fractional)
{
StageFlags = VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT;
AccessFlags = VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT;
Layout = VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT;
}
ProcessedRHIFlags |= (uint32)ERHIAccess::ShadingRateSource;
}
uint32 RemainingFlags = (uint32)RHIAccess & (~ProcessedRHIFlags);
ensureMsgf(RemainingFlags == 0, TEXT("Some access flags were not processed. RHIAccess=%x, ProcessedRHIFlags=%x, RemainingFlags=%x"), RHIAccess, ProcessedRHIFlags, RemainingFlags);
}
static VkImageAspectFlags GetDepthStencilAspectMask(uint32 PlaneSlice)
{
VkImageAspectFlags AspectFlags = 0;
if (PlaneSlice == FRHISubresourceRange::kAllSubresources)
{
AspectFlags = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
}
if (PlaneSlice == FRHISubresourceRange::kDepthPlaneSlice)
{
AspectFlags = VK_IMAGE_ASPECT_DEPTH_BIT;
}
if (PlaneSlice == FRHISubresourceRange::kStencilPlaneSlice)
{
AspectFlags = VK_IMAGE_ASPECT_STENCIL_BIT;
}
return AspectFlags;
}
// Returns the VK_KHR_synchronization2 layout corresponding to an access type
VkImageLayout FVulkanPipelineBarrier::GetDepthOrStencilLayout(ERHIAccess Access)
{
VkImageLayout Layout;
if (Access == ERHIAccess::Unknown)
{
Layout = VK_IMAGE_LAYOUT_UNDEFINED;
}
else if (Access == ERHIAccess::Discard)
{
Layout = VK_IMAGE_LAYOUT_UNDEFINED;
}
else if (EnumHasAnyFlags(Access, ERHIAccess::CopySrc))
{
Layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
}
else if (EnumHasAnyFlags(Access, ERHIAccess::CopyDest))
{
Layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
else if (EnumHasAnyFlags(Access, ERHIAccess::DSVWrite))
{
Layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
}
else
{
Layout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL;
}
return Layout;
}
static void GetDepthOrStencilStageAndAccessFlags(ERHIAccess Access, VkPipelineStageFlags& StageFlags, VkAccessFlags& AccessFlags)
{
if (Access == ERHIAccess::Unknown || Access == ERHIAccess::Discard)
{
StageFlags = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
AccessFlags = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
return;
}
StageFlags = 0;
AccessFlags = 0;
uint32 ProcessedRHIFlags = 0;
if (EnumHasAllFlags(Access, ERHIAccess::ResolveDst))
{
// Despite being a depth/stencil target, resolve operations are part of the color attachment output stage
StageFlags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
AccessFlags |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
ProcessedRHIFlags |= (uint32)ERHIAccess::ResolveDst;
}
if (EnumHasAnyFlags(Access, ERHIAccess::DSVWrite))
{
StageFlags |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
AccessFlags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
ProcessedRHIFlags |= (uint32)ERHIAccess::DSVWrite;
}
if (EnumHasAnyFlags(Access, ERHIAccess::DSVRead))
{
StageFlags |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
AccessFlags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
ProcessedRHIFlags |= (uint32)ERHIAccess::DSVRead;
}
if (EnumHasAnyFlags(Access, ERHIAccess::SRVGraphics))
{
StageFlags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
AccessFlags |= VK_ACCESS_SHADER_READ_BIT;
ProcessedRHIFlags |= (uint32)ERHIAccess::SRVGraphics;
}
if (EnumHasAnyFlags(Access, ERHIAccess::UAVGraphics))
{
StageFlags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
AccessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
ProcessedRHIFlags |= (uint32)ERHIAccess::UAVGraphics;
}
if (EnumHasAnyFlags(Access, ERHIAccess::SRVCompute))
{
StageFlags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
AccessFlags |= VK_ACCESS_SHADER_READ_BIT;
ProcessedRHIFlags |= (uint32)ERHIAccess::SRVCompute;
}
if (EnumHasAnyFlags(Access, ERHIAccess::UAVCompute))
{
StageFlags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
AccessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
ProcessedRHIFlags |= (uint32)ERHIAccess::UAVCompute;
}
if (EnumHasAnyFlags(Access, ERHIAccess::CopySrc))
{
StageFlags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
AccessFlags |= VK_ACCESS_TRANSFER_READ_BIT;
ProcessedRHIFlags |= (uint32)ERHIAccess::CopySrc;
}
if (EnumHasAnyFlags(Access, ERHIAccess::CopyDest))
{
StageFlags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
AccessFlags |= VK_ACCESS_TRANSFER_WRITE_BIT;
ProcessedRHIFlags |= (uint32)ERHIAccess::CopyDest;
}
const uint32 RemainingFlags = (uint32)Access & (~ProcessedRHIFlags);
ensureMsgf(RemainingFlags == 0, TEXT("Some access flags were not processed. Access=%x, ProcessedRHIFlags=%x, RemainingFlags=%x"), Access, ProcessedRHIFlags, RemainingFlags);
}
//
// Helpers for filling in the fields of a VkImageMemoryBarrier structure.
//
static void SetupImageBarrier(VkImageMemoryBarrier2& ImgBarrier, VkImage Image, VkPipelineStageFlags SrcStageFlags, VkPipelineStageFlags DstStageFlags,
VkAccessFlags SrcAccessFlags, VkAccessFlags DstAccessFlags, VkImageLayout SrcLayout, VkImageLayout DstLayout, const VkImageSubresourceRange& SubresRange)
{
ImgBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2;
ImgBarrier.pNext = nullptr;
ImgBarrier.srcStageMask = SrcStageFlags;
ImgBarrier.dstStageMask = DstStageFlags;
ImgBarrier.srcAccessMask = SrcAccessFlags;
ImgBarrier.dstAccessMask = DstAccessFlags;
ImgBarrier.oldLayout = SrcLayout;
ImgBarrier.newLayout = DstLayout;
ImgBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
ImgBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
ImgBarrier.image = Image;
ImgBarrier.subresourceRange = SubresRange;
}
static void SetupImageBarrierEntireRes(VkImageMemoryBarrier2& ImgBarrier, VkImage Image, VkPipelineStageFlags SrcStageFlags, VkPipelineStageFlags DstStageFlags,
VkAccessFlags SrcAccessFlags, VkAccessFlags DstAccessFlags, VkImageLayout SrcLayout, VkImageLayout DstLayout, VkImageAspectFlags AspectMask)
{
VkImageSubresourceRange SubresRange;
SubresRange.aspectMask = AspectMask;
SubresRange.baseMipLevel = 0;
SubresRange.levelCount = VK_REMAINING_MIP_LEVELS;
SubresRange.baseArrayLayer = 0;
SubresRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
SetupImageBarrier(ImgBarrier, Image, SrcStageFlags, DstStageFlags, SrcAccessFlags, DstAccessFlags, SrcLayout, DstLayout, SubresRange);
}
// Fill in a VkImageSubresourceRange struct from the data contained inside a transition info struct coming from the public API.
static void SetupSubresourceRange(VkImageSubresourceRange& SubresRange, const FRHITransitionInfo& TransitionInfo, VkImageAspectFlags AspectMask)
{
SubresRange.aspectMask = AspectMask;
if (TransitionInfo.IsAllMips())
{
SubresRange.baseMipLevel = 0;
SubresRange.levelCount = VK_REMAINING_MIP_LEVELS;
}
else
{
SubresRange.baseMipLevel = TransitionInfo.MipIndex;
SubresRange.levelCount = 1;
}
if (TransitionInfo.IsAllArraySlices())
{
SubresRange.baseArrayLayer = 0;
SubresRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
}
else
{
SubresRange.baseArrayLayer = TransitionInfo.ArraySlice;
SubresRange.layerCount = 1;
}
}
void FVulkanDynamicRHI::RHICreateTransition(FRHITransition* Transition, const FRHITransitionCreateInfo& CreateInfo)
{
TRACE_CPUPROFILER_EVENT_SCOPE(RHICreateTransition);
const ERHIPipeline SrcPipelines = CreateInfo.SrcPipelines;
const ERHIPipeline DstPipelines = CreateInfo.DstPipelines;
FVulkanTransitionData* Data = new (Transition->GetPrivateData<FVulkanTransitionData>()) FVulkanTransitionData;
Data->TransitionInfos = CreateInfo.TransitionInfos;
Data->SrcPipelines = SrcPipelines;
Data->DstPipelines = DstPipelines;
if ((SrcPipelines != DstPipelines) && !EnumHasAnyFlags(CreateInfo.Flags, ERHITransitionCreateFlags::NoFence))
{
Data->Semaphore = new VulkanRHI::FSemaphore(*Device);
}
// If we're staying on the same queue, use split barriers if they are permitted and supported
if (GVulkanAllowSplitBarriers && (GVulkanMaxBarriersPerBatch <= 0) && Device->SupportsParallelRendering() &&
(SrcPipelines == DstPipelines) && !EnumHasAnyFlags(CreateInfo.Flags, ERHITransitionCreateFlags::NoSplit))
{
// Track if host stage is used, it will prevent using split barrier
bool bIncludesHostSync = false;
for (const FRHITransitionInfo& Info : Data->TransitionInfos)
{
if (EnumHasAnyFlags(Info.AccessBefore, ERHIAccess::CPURead) || EnumHasAnyFlags(Info.AccessAfter, ERHIAccess::CPURead))
{
bIncludesHostSync = true;
break;
}
}
// Create an event for the split barriers
Data->EventHandle = !bIncludesHostSync ? Device->GetBarrierEvent() : VK_NULL_HANDLE;
}
}
void FVulkanDynamicRHI::RHIReleaseTransition(FRHITransition* Transition)
{
Transition->GetPrivateData<FVulkanTransitionData>()->~FVulkanTransitionData();
}
// We need to keep the texture pointers around, because we need to call OnTransitionResource on them, and we need mip and layer counts for the tracking code.
struct FImageBarrierExtraData
{
FVulkanTexture* BaseTexture = nullptr;
bool IsAliasingBarrier = false;
uint8 PlaneSlice = 0;
ERHIAccess PlaneAccess = ERHIAccess::Unknown;
};
struct FLegacyBarrierArrays
{
TArray<VkMemoryBarrier, TInlineAllocator<1>> MemoryBarriers;
TArray<VkBufferMemoryBarrier> BufferBarriers;
TArray<VkImageMemoryBarrier> ImageBarriers;
TArray<FImageBarrierExtraData> ImageExtraData;
VkPipelineStageFlags SrcStageMask = 0;
VkPipelineStageFlags DstStageMask = 0;
};
struct FSync2BarrierArrays
{
TArray<VkMemoryBarrier2, TInlineAllocator<1>> MemoryBarriers;
TArray<VkBufferMemoryBarrier2> BufferBarriers;
TArray<VkImageMemoryBarrier2> ImageBarriers;
};
template<typename BarrierArrayType>
void ConvertTransitionToBarriers(FVulkanCommandListContext& Context, const FVulkanTransitionData& Data, BarrierArrayType& OutBarriers)
{
using MemoryBarrierType = typename decltype(OutBarriers.MemoryBarriers)::ElementType;
using BufferBarrierType = typename decltype(OutBarriers.BufferBarriers)::ElementType;
using ImageBarrierType = typename decltype(OutBarriers.ImageBarriers)::ElementType;
// Legacy barriers use a single stage for all barriers
constexpr bool bIsLegacyBarrier = std::is_same_v<FLegacyBarrierArrays, BarrierArrayType>;
// Count the images and buffers to be able to pre-allocate the arrays.
int32 NumTextures = 0, NumBuffers = 0;
for (const FRHITransitionInfo& Info : Data.TransitionInfos)
{
if (!Info.Resource)
{
continue;
}
if (EnumHasAnyFlags(Info.AccessAfter, ERHIAccess::Discard))
{
// Discard as a destination is a no-op
continue;
}
if (Info.Type == FRHITransitionInfo::EType::Texture)
{
// CPU accessible "textures" are implemented as buffers. Check if this is a real texture or a buffer.
FVulkanTexture* Texture = ResourceCast(Info.Texture);
if (Texture->GetCpuReadbackBuffer() == nullptr)
{
++NumTextures;
}
continue;
}
if (Info.Type == FRHITransitionInfo::EType::UAV)
{
FVulkanUnorderedAccessView* UAV = ResourceCast(Info.UAV);
if (UAV->IsTexture())
{
++NumTextures;
continue;
}
}
if (Data.SrcPipelines != Data.DstPipelines)
{
++NumBuffers;
}
}
// Presize all the arrays
if (!GVulkanUseMemoryBarrierOpt)
{
OutBarriers.BufferBarriers.Reserve(OutBarriers.BufferBarriers.Num() + NumBuffers);
}
OutBarriers.ImageBarriers.Reserve(OutBarriers.ImageBarriers.Num() + NumTextures);
const ERHIAccess DepthStencilFlags = ERHIAccess::DSVRead | ERHIAccess::DSVWrite;
for (const FRHITransitionInfo& Info : Data.TransitionInfos)
{
if (!Info.Resource)
{
continue;
}
if (Info.AccessAfter == ERHIAccess::Discard)
{
// Discard as a destination is a no-op
continue;
}
const UE::RHICore::FResourceState ResourceState(Context, Data.SrcPipelines, Data.DstPipelines, Info);
FVulkanBuffer* Buffer = nullptr;
FVulkanTexture* Texture = nullptr;
FRHITransitionInfo::EType UnderlyingType = Info.Type;
uint32 UsageFlags = 0;
switch (Info.Type)
{
case FRHITransitionInfo::EType::Texture:
{
Texture = ResourceCast(Info.Texture);
if (Texture->GetCpuReadbackBuffer())
{
Texture = nullptr;
}
break;
}
case FRHITransitionInfo::EType::Buffer:
{
Buffer = ResourceCast(Info.Buffer);
UsageFlags = Buffer->GetBufferUsageFlags();
break;
}
case FRHITransitionInfo::EType::UAV:
{
FVulkanUnorderedAccessView* UAV = ResourceCast(Info.UAV);
if (UAV->IsTexture())
{
Texture = ResourceCast(UAV->GetTexture());
UnderlyingType = FRHITransitionInfo::EType::Texture;
}
else
{
Buffer = ResourceCast(UAV->GetBuffer());
UnderlyingType = FRHITransitionInfo::EType::Buffer;
UsageFlags = Buffer->GetBufferUsageFlags();
}
break;
}
case FRHITransitionInfo::EType::BVH:
{
// Requires memory barrier
break;
}
default:
checkNoEntry();
continue;
}
VkPipelineStageFlags SrcStageMask, DstStageMask;
VkAccessFlags SrcAccessFlags, DstAccessFlags;
VkImageLayout SrcLayout, DstLayout;
const bool bIsDepthStencil = Texture && Texture->IsDepthOrStencilAspect();
if (bIsDepthStencil)
{
// if we use separate transitions, then just feed them in as they are
SrcLayout = FVulkanPipelineBarrier::GetDepthOrStencilLayout(ResourceState.AccessBefore);
DstLayout = FVulkanPipelineBarrier::GetDepthOrStencilLayout(ResourceState.AccessAfter);
GetDepthOrStencilStageAndAccessFlags(ResourceState.AccessBefore, SrcStageMask, SrcAccessFlags);
GetDepthOrStencilStageAndAccessFlags(ResourceState.AccessAfter, DstStageMask, DstAccessFlags);
}
else
{
const bool bSupportsReadOnlyOptimal = (Texture == nullptr) || Texture->SupportsSampling();
GetVkStageAndAccessFlags(ResourceState.AccessBefore, UnderlyingType, UsageFlags, bIsDepthStencil, bSupportsReadOnlyOptimal, SrcStageMask, SrcAccessFlags, SrcLayout, true);
GetVkStageAndAccessFlags(ResourceState.AccessAfter, UnderlyingType, UsageFlags, bIsDepthStencil, bSupportsReadOnlyOptimal, DstStageMask, DstAccessFlags, DstLayout, false);
// If not compute, remove vertex pipeline bits as only compute updates vertex buffers
if (!(SrcStageMask & VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT))
{
DstStageMask &= ~(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT);
}
}
// Mash them all together for legacy barriers, set them for each barrier in sync2
if constexpr (bIsLegacyBarrier)
{
OutBarriers.SrcStageMask |= SrcStageMask;
OutBarriers.DstStageMask |= DstStageMask;
}
// If we're not transitioning across pipes and we don't need to perform layout transitions, we can express memory dependencies through a global memory barrier.
if ((Data.SrcPipelines == Data.DstPipelines) && (Texture == nullptr || (SrcLayout == DstLayout)) && GVulkanUseMemoryBarrierOpt)
{
if (OutBarriers.MemoryBarriers.Num() == 0)
{
MemoryBarrierType& NewBarrier = OutBarriers.MemoryBarriers.AddZeroed_GetRef();
NewBarrier.sType = bIsLegacyBarrier ? VK_STRUCTURE_TYPE_MEMORY_BARRIER : VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
}
const VkAccessFlags ReadMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_MEMORY_READ_BIT;
// Mash everything into a single barrier
MemoryBarrierType& MemoryBarrier = OutBarriers.MemoryBarriers[0];
// We only need a memory barrier if the previous commands wrote to the buffer. In case of a transition from read, an execution barrier is enough.
const bool SrcAccessIsRead = ((SrcAccessFlags & (~ReadMask)) == 0);
if (!SrcAccessIsRead)
{
MemoryBarrier.srcAccessMask |= SrcAccessFlags;
MemoryBarrier.dstAccessMask |= DstAccessFlags;
}
if constexpr (!bIsLegacyBarrier)
{
MemoryBarrier.srcStageMask |= SrcStageMask;
MemoryBarrier.dstStageMask |= DstStageMask;
}
}
else if (Buffer != nullptr)
{
BufferBarrierType& BufferBarrier = OutBarriers.BufferBarriers.AddZeroed_GetRef();
BufferBarrier.sType = bIsLegacyBarrier ? VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER : VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2;
BufferBarrier.srcAccessMask = SrcAccessFlags;
BufferBarrier.dstAccessMask = DstAccessFlags;
BufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
BufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
const VulkanRHI::FVulkanAllocation& BufferAlloc = Buffer->GetCurrentAllocation();
BufferBarrier.buffer = BufferAlloc.GetBufferHandle();
BufferBarrier.offset = BufferAlloc.Offset;
BufferBarrier.size = BufferAlloc.Size;
if constexpr (!bIsLegacyBarrier)
{
BufferBarrier.srcStageMask = SrcStageMask;
BufferBarrier.dstStageMask = DstStageMask;
}
}
else if (Texture != nullptr)
{
const VkImageAspectFlags AspectFlags = bIsDepthStencil ? GetDepthStencilAspectMask(Info.PlaneSlice) : Texture->GetFullAspectMask();
ImageBarrierType& ImageBarrier = OutBarriers.ImageBarriers.AddZeroed_GetRef();
ImageBarrier.sType = bIsLegacyBarrier ? VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER : VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2;
ImageBarrier.srcAccessMask = SrcAccessFlags;
ImageBarrier.dstAccessMask = DstAccessFlags;
ImageBarrier.oldLayout = SrcLayout;
ImageBarrier.newLayout = DstLayout;
ImageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
ImageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
ImageBarrier.image = Texture->Image;
SetupSubresourceRange(ImageBarrier.subresourceRange, Info, AspectFlags);
if constexpr (bIsLegacyBarrier)
{
FImageBarrierExtraData& ExtraData = OutBarriers.ImageExtraData.AddDefaulted_GetRef();;
ExtraData.BaseTexture = Texture;
ExtraData.IsAliasingBarrier = (ResourceState.AccessBefore == ERHIAccess::Discard);
if (Texture->IsDepthOrStencilAspect())
{
ExtraData.PlaneAccess = ResourceState.AccessAfter;
ExtraData.PlaneSlice = Info.PlaneSlice;
}
}
else
{
ImageBarrier.srcStageMask = SrcStageMask;
ImageBarrier.dstStageMask = DstStageMask;
}
}
else
{
checkf(false, TEXT("Transition with no resource!"));
}
}
}
template <typename MemoryBarrierType>
static void MergeBarrierAccessMask(MemoryBarrierType& MemoryBarrier, VkAccessFlags InSrcAccessFlags, VkAccessFlags InDstAccessFlags)
{
const VkAccessFlags ReadMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_MEMORY_READ_BIT;
// We only need a memory barrier if the previous commands wrote to the buffer. In case of a transition from read, an execution barrier is enough.
const bool SrcAccessIsRead = ((InSrcAccessFlags & (~ReadMask)) == 0);
if (!SrcAccessIsRead)
{
MemoryBarrier.srcAccessMask |= InSrcAccessFlags;
MemoryBarrier.dstAccessMask |= InDstAccessFlags;
}
}
static void DowngradeBarrier(VkMemoryBarrier& OutBarrier, const VkMemoryBarrier2& InBarrier)
{
OutBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
OutBarrier.pNext = InBarrier.pNext;
OutBarrier.srcAccessMask = InBarrier.srcAccessMask;
OutBarrier.dstAccessMask = InBarrier.dstAccessMask;
}
static void DowngradeBarrier(VkBufferMemoryBarrier& OutBarrier, const VkBufferMemoryBarrier2& InBarrier)
{
OutBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
OutBarrier.pNext = InBarrier.pNext;
OutBarrier.srcAccessMask = InBarrier.srcAccessMask;
OutBarrier.dstAccessMask = InBarrier.dstAccessMask;
OutBarrier.srcQueueFamilyIndex = InBarrier.srcQueueFamilyIndex;
OutBarrier.dstQueueFamilyIndex = InBarrier.dstQueueFamilyIndex;
OutBarrier.buffer = InBarrier.buffer;
OutBarrier.offset = InBarrier.offset;
OutBarrier.size = InBarrier.size;
}
static void DowngradeBarrier(VkImageMemoryBarrier& OutBarrier, const VkImageMemoryBarrier2& InBarrier)
{
OutBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
OutBarrier.pNext = InBarrier.pNext;
OutBarrier.srcAccessMask = InBarrier.srcAccessMask;
OutBarrier.dstAccessMask = InBarrier.dstAccessMask;
OutBarrier.oldLayout = InBarrier.oldLayout;
OutBarrier.newLayout = InBarrier.newLayout;
OutBarrier.srcQueueFamilyIndex = InBarrier.srcQueueFamilyIndex;
OutBarrier.dstQueueFamilyIndex = InBarrier.dstQueueFamilyIndex;
OutBarrier.image = InBarrier.image;
OutBarrier.subresourceRange = InBarrier.subresourceRange;
}
template <typename DstArrayType, typename BarrierType>
static void DowngradeBarrier(DstArrayType& TargetArray, const BarrierType& SrcBarrier, VkPipelineStageFlags& MergedSrcStageMask, VkPipelineStageFlags& MergedDstStageMask)
{
auto& DstBarrier = TargetArray.AddDefaulted_GetRef();
DowngradeBarrier(DstBarrier, SrcBarrier);
MergedSrcStageMask |= SrcBarrier.srcStageMask;
MergedDstStageMask |= SrcBarrier.dstStageMask;
}
template <typename DstArrayType, typename SrcArrayType>
static void DowngradeBarrierArray(DstArrayType& TargetArray, const SrcArrayType& SrcArray, VkPipelineStageFlags& MergedSrcStageMask, VkPipelineStageFlags& MergedDstStageMask)
{
TargetArray.Reserve(TargetArray.Num() + SrcArray.Num());
for (const auto& SrcBarrier : SrcArray)
{
auto& DstBarrier = TargetArray.AddDefaulted_GetRef();
DowngradeBarrier(DstBarrier, SrcBarrier);
MergedSrcStageMask |= SrcBarrier.srcStageMask;
MergedDstStageMask |= SrcBarrier.dstStageMask;
}
}
// Legacy manual barriers inside the RHI with FVulkanPipelineBarrier don't have access to tracking, assume same layout for both aspects
template <typename BarrierArrayType>
static void MergeDepthStencilLayouts(BarrierArrayType& TargetArray)
{
for (auto& Barrier : TargetArray)
{
if (VKHasAnyFlags(Barrier.subresourceRange.aspectMask, (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)))
{
if (Barrier.newLayout == VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL)
{
Barrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
}
else if (Barrier.newLayout == VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL)
{
Barrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
}
if (Barrier.oldLayout == VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL)
{
Barrier.oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
}
else if (Barrier.oldLayout == VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL)
{
Barrier.oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
}
}
}
}
// Legacy barriers must always submit depth and stencil together
static void MergePlanes(VkImageMemoryBarrier* ImageBarriers, const FImageBarrierExtraData* RemainingExtras, int32 RemainingCount)
{
VkImageMemoryBarrier& ImageBarrier = ImageBarriers[0];
FVulkanTexture* Texture = RemainingExtras->BaseTexture;
check(Texture->Image == ImageBarrier.image);
check(ImageBarrier.newLayout != VK_IMAGE_LAYOUT_UNDEFINED);
check((ImageBarrier.oldLayout != VK_IMAGE_LAYOUT_UNDEFINED) || RemainingExtras->IsAliasingBarrier);
// For Depth/Stencil formats where only one of the aspects is transitioned, look ahead for other barriers on the same resource
if (Texture->IsDepthOrStencilAspect())
{
if (Texture->GetFullAspectMask() != ImageBarrier.subresourceRange.aspectMask)
{
check(VKHasAnyFlags(ImageBarrier.subresourceRange.aspectMask, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
const VkImageAspectFlagBits OtherAspectMask = (VkImageAspectFlagBits)(Texture->GetFullAspectMask() ^ ImageBarrier.subresourceRange.aspectMask);
VkImageMemoryBarrier* OtherAspectImageBarrier = nullptr;
for (int32 OtherBarrierIndex = 0; OtherBarrierIndex < RemainingCount; ++OtherBarrierIndex)
{
VkImageMemoryBarrier& OtherImageBarrier = ImageBarriers[OtherBarrierIndex];
FVulkanTexture* OtherTexture = RemainingExtras[OtherBarrierIndex].BaseTexture;
if ((OtherTexture->Image == ImageBarrier.image) && (OtherImageBarrier.subresourceRange.aspectMask == OtherAspectMask))
{
check(ImageBarrier.subresourceRange.baseArrayLayer == OtherImageBarrier.subresourceRange.baseArrayLayer);
check(ImageBarrier.subresourceRange.baseMipLevel == OtherImageBarrier.subresourceRange.baseMipLevel);
OtherAspectImageBarrier = &OtherImageBarrier;
break;
}
}
VkImageLayout OtherAspectOldLayout, OtherAspectNewLayout;
if (OtherAspectImageBarrier)
{
OtherAspectOldLayout = OtherAspectImageBarrier->oldLayout;
OtherAspectNewLayout = OtherAspectImageBarrier->newLayout;
// Make it invalid so that it gets removed when we reach it
OtherAspectImageBarrier->subresourceRange.aspectMask = 0;
}
else
{
ERHIAccess OtherPlaneAccess = Texture->AllPlanesTrackedAccess[ImageBarrier.subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 0 : 1];
OtherAspectOldLayout = OtherAspectNewLayout = FVulkanPipelineBarrier::GetDepthOrStencilLayout(OtherPlaneAccess);
}
// Merge the layout with its other half and set it in the barrier
if (OtherAspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
{
ImageBarrier.oldLayout = VulkanRHI::GetMergedDepthStencilLayout(ImageBarrier.oldLayout, OtherAspectOldLayout);
ImageBarrier.newLayout = VulkanRHI::GetMergedDepthStencilLayout(ImageBarrier.newLayout, OtherAspectNewLayout);
}
else
{
ImageBarrier.oldLayout = VulkanRHI::GetMergedDepthStencilLayout(OtherAspectOldLayout, ImageBarrier.oldLayout);
ImageBarrier.newLayout = VulkanRHI::GetMergedDepthStencilLayout(OtherAspectNewLayout, ImageBarrier.newLayout);
}
ImageBarrier.subresourceRange.aspectMask |= OtherAspectMask;
}
else
{
// Transitions every aspect of the depth(-stencil) texture
ImageBarrier.oldLayout = VulkanRHI::GetMergedDepthStencilLayout(ImageBarrier.oldLayout, ImageBarrier.oldLayout);
ImageBarrier.newLayout = VulkanRHI::GetMergedDepthStencilLayout(ImageBarrier.newLayout, ImageBarrier.newLayout);
}
}
// Once we're done with the barrier, make sure there are no sync2 states left
check((ImageBarrier.oldLayout != VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL) && (ImageBarrier.oldLayout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL));
check((ImageBarrier.newLayout != VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL) && (ImageBarrier.newLayout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL));
}
// Create Vulkan barriers from RHI transitions when VK_KHR_Synchronization2 is NOT supported (legacy code path)
void ProcessTransitionLegacy(FVulkanCommandListContext& Context, TArrayView<const FRHITransition*>& Transitions)
{
for (const FRHITransition* Transition : Transitions)
{
const FVulkanTransitionData* Data = Transition->GetPrivateData<FVulkanTransitionData>();
const bool bIsSingleQueue = IsSingleRHIPipeline(Data->SrcPipelines) && (Data->SrcPipelines == Data->DstPipelines);
checkf(bIsSingleQueue, TEXT("Devices without support for Sync2 should not be using async compute."));
#if DO_GUARD_SLOW
checkf(EnumHasAnyFlags(Data->SrcPipelines, Context.GetPipeline()) && EnumHasAnyFlags(Data->DstPipelines, Context.GetPipeline()),
TEXT("The pipelines for this transition are [%s -> %s], but it's submitted on the [%s] queue."),
*GetRHIPipelineName(Data->SrcPipelines),
*GetRHIPipelineName(Data->DstPipelines),
*GetRHIPipelineName(Context.GetPipeline())
);
#endif
FLegacyBarrierArrays BarrierArrays;
ConvertTransitionToBarriers(Context, *Data, BarrierArrays);
// Merge any depth/stencil barriers
for (int32 Index = 0; Index < BarrierArrays.ImageExtraData.Num(); ++Index)
{
const FImageBarrierExtraData* RemainingExtras = &BarrierArrays.ImageExtraData[Index];
VkImageMemoryBarrier* RemainingBarriers = &BarrierArrays.ImageBarriers[Index];
if (RemainingExtras->BaseTexture->IsDepthOrStencilAspect())
{
RemainingExtras->BaseTexture->AllPlanesTrackedAccess[RemainingExtras->PlaneSlice] = RemainingExtras->PlaneAccess;
}
if ((RemainingBarriers->image != VK_NULL_HANDLE) && (RemainingBarriers->subresourceRange.aspectMask != 0))
{
const int32 RemainingCount = BarrierArrays.ImageExtraData.Num() - Index;
MergePlanes(RemainingBarriers, RemainingExtras, RemainingCount);
}
}
// Merging Depth and Stencil transitions will also result in null aspectMask for the extra transition which needs to be removed.
for (int32 DstIndex = 0; DstIndex < BarrierArrays.ImageBarriers.Num();)
{
if ((BarrierArrays.ImageBarriers[DstIndex].image == VK_NULL_HANDLE) || ((BarrierArrays.ImageBarriers[DstIndex].subresourceRange.aspectMask == 0)))
{
BarrierArrays.ImageBarriers.RemoveAtSwap(DstIndex);
}
else
{
++DstIndex;
}
}
// Submit
if (BarrierArrays.MemoryBarriers.Num() || BarrierArrays.BufferBarriers.Num() || BarrierArrays.ImageBarriers.Num())
{
// Submit merged stage masks with arrays of barriers
VulkanRHI::vkCmdPipelineBarrier(Context.GetCommandBuffer().GetHandle(),
BarrierArrays.SrcStageMask, BarrierArrays.DstStageMask, 0 /*VkDependencyFlags*/,
BarrierArrays.MemoryBarriers.Num(), BarrierArrays.MemoryBarriers.GetData(),
BarrierArrays.BufferBarriers.Num(), BarrierArrays.BufferBarriers.GetData(),
BarrierArrays.ImageBarriers.Num(), BarrierArrays.ImageBarriers.GetData());
}
}
}
// Removes stages/access that aren't supported by the compute queue
template<typename BarrierType>
static void MaskSupportedAsyncFlags(FVulkanDevice& Device, TArray<BarrierType>& InOutBarriers, bool bMaskSrc, bool bMaskDst)
{
const VkPipelineStageFlags SupportedComputeStageMask = Device.GetComputeQueue()->GetSupportedStageBits();
const VkAccessFlags SupportedComputeAccessMasks = Device.GetComputeQueue()->GetSupportedAccessFlags();
for (BarrierType& Barrier : InOutBarriers)
{
if (bMaskSrc)
{
Barrier.srcStageMask &= SupportedComputeStageMask;
Barrier.srcAccessMask &= SupportedComputeAccessMasks;
}
if (bMaskDst)
{
Barrier.dstStageMask &= SupportedComputeStageMask;
Barrier.dstAccessMask &= SupportedComputeAccessMasks;
}
}
}
// Patches barriers for release/acquire of resources during queue ownership transfers
template<typename BarrierType>
static void PatchCrossPipeTransitions(TArray<BarrierType>& Barriers, FVulkanCommandListContext& Context, ERHIPipeline SrcPipelines, ERHIPipeline DstPipelines, bool bIsBeginTransition)
{
const ERHIPipeline ExecutingPipeline = Context.GetPipeline();
const uint32 GraphicsFamilyIndex = Context.Device.GetGraphicsQueue()->GetFamilyIndex();
const uint32 ComputeFamilyIndex = Context.Device.GetComputeQueue()->GetFamilyIndex();
// In the case where src and dst are both single pipelines, keep the layout changes to try to do all the work in a single barrier
if (IsSingleRHIPipeline(SrcPipelines) && IsSingleRHIPipeline(DstPipelines))
{
for (int32 Index = 0; Index < Barriers.Num(); ++Index)
{
BarrierType& Barrier = Barriers[Index];
Barrier.srcQueueFamilyIndex = (SrcPipelines == ERHIPipeline::Graphics) ? GraphicsFamilyIndex : ComputeFamilyIndex;
Barrier.dstQueueFamilyIndex = (DstPipelines == ERHIPipeline::Graphics) ? GraphicsFamilyIndex : ComputeFamilyIndex;
if (bIsBeginTransition)
{
// Release
check(SrcPipelines == ExecutingPipeline);
Barrier.dstAccessMask = 0;
}
else
{
// Acquire
check(DstPipelines == ExecutingPipeline);
Barrier.srcAccessMask = 0;
}
}
}
else // Src/Dst is ERHIPipeline::All, add a single queue transfer for now :todo-jn:
{
// The cross-pipe transition will have a Graphics source when it's Graphics->All or All->AsyncCompute
const bool bSrcIsGraphics = ((SrcPipelines == ERHIPipeline::Graphics) || (DstPipelines == ERHIPipeline::AsyncCompute));
// When a transition uses ERHIPipeline::All, it might include stages/access that isn't supported on all queues
MaskSupportedAsyncFlags(Context.Device, Barriers, !bSrcIsGraphics, bSrcIsGraphics);
for (int32 Index = 0; Index < Barriers.Num(); ++Index)
{
BarrierType& Barrier = Barriers[Index];
// Set the queue families and filter the stages for ERHIPipeline::All running on async compute
if (bSrcIsGraphics)
{
Barrier.srcQueueFamilyIndex = GraphicsFamilyIndex;
Barrier.dstQueueFamilyIndex = ComputeFamilyIndex;
}
else
{
Barrier.srcQueueFamilyIndex = ComputeFamilyIndex;
Barrier.dstQueueFamilyIndex = GraphicsFamilyIndex;
}
// Remove the layout change, it gets submitted separately (on the single pipeline for 1..N and N..1 transitions)
if constexpr (std::is_same_v<BarrierType, VkImageMemoryBarrier2>)
{
if (IsSingleRHIPipeline(SrcPipelines))
{
Barrier.oldLayout = Barrier.newLayout;
}
else if (IsSingleRHIPipeline(DstPipelines))
{
Barrier.newLayout = Barrier.oldLayout;
}
}
if (bIsBeginTransition)
{
// Release resource from current queue.
check(EnumHasAllFlags(SrcPipelines, ExecutingPipeline));
Barrier.dstAccessMask = 0;
}
else
{
// Acquire resource on current queue.
check(EnumHasAllFlags(DstPipelines, ExecutingPipeline));
Barrier.srcAccessMask = 0;
}
}
}
}
// Used to split up barrier batches in single calls to vkCmdPipelineBarrier2 on drivers with issues on larger batches
template<typename BarrierType>
static void SendBatchedBarriers(VkCommandBuffer CommandBuffer, VkDependencyInfo& BatchDependencyInfo, BarrierType*& BarrierPtr, uint32_t& BarrierCountRef, int32 TotalBarrierCount)
{
for (int32 BatchStartIndex = 0; BatchStartIndex < TotalBarrierCount; BatchStartIndex += GVulkanMaxBarriersPerBatch)
{
BarrierCountRef = FMath::Min((TotalBarrierCount - BatchStartIndex), GVulkanMaxBarriersPerBatch);
VulkanRHI::vkCmdPipelineBarrier2KHR(CommandBuffer, &BatchDependencyInfo);
BarrierPtr += BarrierCountRef;
}
BarrierPtr = nullptr;
BarrierCountRef = 0;
}
// Create Vulkan barriers from RHI transitions when VK_KHR_Synchronization2 is supported
void ProcessTransitionSync2(FVulkanCommandListContext& Context, TArrayView<const FRHITransition*>& Transitions, bool bIsBeginTransition)
{
auto SubmitBarriers = [bIsBeginTransition, &Context](TArrayView<const VkMemoryBarrier2> MemoryBarriers, TArrayView<const VkBufferMemoryBarrier2> BufferBarriers, TArrayView<const VkImageMemoryBarrier2> ImageBarriers, VkEvent BarrierEvent = VK_NULL_HANDLE)
{
if (MemoryBarriers.Num() || BufferBarriers.Num() || ImageBarriers.Num())
{
VkDependencyInfo DependencyInfo;
DependencyInfo.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO;
DependencyInfo.pNext = nullptr;
DependencyInfo.dependencyFlags = 0;
DependencyInfo.memoryBarrierCount = MemoryBarriers.Num();
DependencyInfo.pMemoryBarriers = MemoryBarriers.GetData();
DependencyInfo.bufferMemoryBarrierCount = BufferBarriers.Num();
DependencyInfo.pBufferMemoryBarriers = BufferBarriers.GetData();
DependencyInfo.imageMemoryBarrierCount = ImageBarriers.Num();
DependencyInfo.pImageMemoryBarriers = ImageBarriers.GetData();
if (BarrierEvent)
{
if (bIsBeginTransition)
{
Context.GetCommandBuffer().BeginSplitBarrier(BarrierEvent, DependencyInfo);
}
else
{
Context.GetCommandBuffer().EndSplitBarrier(BarrierEvent, DependencyInfo);
}
}
else if ((GVulkanMaxBarriersPerBatch <= 0) || ((MemoryBarriers.Num() + BufferBarriers.Num() + ImageBarriers.Num()) < GVulkanMaxBarriersPerBatch))
{
VulkanRHI::vkCmdPipelineBarrier2KHR(Context.GetCommandBuffer().GetHandle(), &DependencyInfo);
}
else
{
VkDependencyInfo BatchDependencyInfo = DependencyInfo;
BatchDependencyInfo.memoryBarrierCount = 0;
BatchDependencyInfo.bufferMemoryBarrierCount = 0;
BatchDependencyInfo.imageMemoryBarrierCount = 0;
VkCommandBuffer CommandBuffer = Context.GetCommandBuffer().GetHandle();
SendBatchedBarriers(CommandBuffer, BatchDependencyInfo, BatchDependencyInfo.pMemoryBarriers, BatchDependencyInfo.memoryBarrierCount, DependencyInfo.memoryBarrierCount);
SendBatchedBarriers(CommandBuffer, BatchDependencyInfo, BatchDependencyInfo.pBufferMemoryBarriers, BatchDependencyInfo.bufferMemoryBarrierCount, DependencyInfo.bufferMemoryBarrierCount);
SendBatchedBarriers(CommandBuffer, BatchDependencyInfo, BatchDependencyInfo.pImageMemoryBarriers, BatchDependencyInfo.imageMemoryBarrierCount, DependencyInfo.imageMemoryBarrierCount);
}
}
};
TArray<VkBufferMemoryBarrier2> TempBufferBarriers;
TArray<VkImageMemoryBarrier2> TempImageBarriers;
const bool bUseOwnershipTransfers = Context.Device.HasAsyncComputeQueue() && (!GVulkanAllowConcurrentBuffer || !GVulkanAllowConcurrentImage);
for (const FRHITransition* Transition : Transitions)
{
const FVulkanTransitionData* Data = Transition->GetPrivateData<FVulkanTransitionData>();
const bool bIsSingleQueue = IsSingleRHIPipeline(Data->SrcPipelines) && (Data->SrcPipelines == Data->DstPipelines);
const ERHIPipeline TargetPipeline = bIsBeginTransition ? Data->SrcPipelines : Data->DstPipelines;
const ERHIPipeline OtherPipeline = bIsBeginTransition ? Data->DstPipelines : Data->SrcPipelines;
const ERHIPipeline ExecutingPipeline = Context.GetPipeline();
checkf(EnumHasAnyFlags(TargetPipeline, Context.GetPipeline()),
TEXT("The %s pipelines for this transition are [%s], but it's submitted on the [%s] queue."),
bIsBeginTransition ? TEXT("SRC") : TEXT("DST"),
*GetRHIPipelineName(TargetPipeline),
*GetRHIPipelineName(Context.GetPipeline())
);
// Single queue barrier that aren't split only submit in EndTransition
if (bIsSingleQueue && bIsBeginTransition && !Data->EventHandle)
{
continue;
}
FSync2BarrierArrays BarrierArrays;
ConvertTransitionToBarriers(Context, *Data, BarrierArrays);
// Submit split-barriers right away (they are always single queue)
if (Data->EventHandle)
{
checkf(bIsSingleQueue, TEXT("Split barriers must remain on same queue!"));
SubmitBarriers(MakeArrayView(BarrierArrays.MemoryBarriers), MakeArrayView(BarrierArrays.BufferBarriers), MakeArrayView(BarrierArrays.ImageBarriers), Data->EventHandle);
continue;
}
// Same queue, or single-queue to single-queue transfers, can be submitted directly without copies
if (IsSingleRHIPipeline(Data->SrcPipelines) && IsSingleRHIPipeline(Data->DstPipelines))
{
// For cross-queue 1..1 transitions we will keep the layout change in the same barrier as the queue transfer
if (bUseOwnershipTransfers && (Data->SrcPipelines != Data->DstPipelines))
{
if (!GVulkanAllowConcurrentBuffer)
{
PatchCrossPipeTransitions(BarrierArrays.BufferBarriers, Context, Data->SrcPipelines, Data->DstPipelines, bIsBeginTransition);
}
if (!GVulkanAllowConcurrentImage)
{
PatchCrossPipeTransitions(BarrierArrays.ImageBarriers, Context, Data->SrcPipelines, Data->DstPipelines, bIsBeginTransition);
}
}
SubmitBarriers(MakeArrayView(BarrierArrays.MemoryBarriers), MakeArrayView(BarrierArrays.BufferBarriers), MakeArrayView(BarrierArrays.ImageBarriers), Data->EventHandle);
continue;
}
// For 1..N or N..1 transitions we submit the barriers for layout changes on the single pipeline (rest is covered by the sema)
const bool bNeedsPreLayoutChange = (bIsBeginTransition && ((IsSingleRHIPipeline(Data->SrcPipelines) && !IsSingleRHIPipeline(Data->DstPipelines) && (Data->SrcPipelines == ExecutingPipeline))));
const bool bNeedsRelease = (bIsBeginTransition && (bNeedsPreLayoutChange ||
(IsSingleRHIPipeline(Data->DstPipelines) && !IsSingleRHIPipeline(Data->SrcPipelines) && (Data->DstPipelines != ExecutingPipeline))));
const bool bNeedsPostLayoutChange = (!bIsBeginTransition && (IsSingleRHIPipeline(Data->DstPipelines) && !IsSingleRHIPipeline(Data->SrcPipelines) && (Data->DstPipelines == ExecutingPipeline)));
const bool bNeedsAcquire = (!bIsBeginTransition && (bNeedsPostLayoutChange ||
(IsSingleRHIPipeline(Data->SrcPipelines) && !IsSingleRHIPipeline(Data->DstPipelines) && (Data->SrcPipelines != ExecutingPipeline))));
// For resources without concurrent sharing mode, make copies of the array so we can:
// - wipe the layout transitions from the barriers
// - patch in the actual queue family ownership transfer
if (bUseOwnershipTransfers && (bNeedsRelease || bNeedsAcquire))
{
if (!GVulkanAllowConcurrentBuffer)
{
TempBufferBarriers = BarrierArrays.BufferBarriers;
PatchCrossPipeTransitions(TempBufferBarriers, Context, Data->SrcPipelines, Data->DstPipelines, bIsBeginTransition);
}
if (!GVulkanAllowConcurrentImage)
{
TempImageBarriers = BarrierArrays.ImageBarriers;
PatchCrossPipeTransitions(TempImageBarriers, Context, Data->SrcPipelines, Data->DstPipelines, bIsBeginTransition);
}
}
if (bNeedsPreLayoutChange)
{
// Remove unsupported flags if we're submitting on the compute queue
if (ExecutingPipeline == ERHIPipeline::AsyncCompute)
{
MaskSupportedAsyncFlags(Context.Device, BarrierArrays.BufferBarriers, false, true);
MaskSupportedAsyncFlags(Context.Device, BarrierArrays.ImageBarriers, false, true);
}
SubmitBarriers(MakeArrayView(BarrierArrays.MemoryBarriers), MakeArrayView(BarrierArrays.BufferBarriers), MakeArrayView(BarrierArrays.ImageBarriers));
}
if (TempBufferBarriers.Num() || TempImageBarriers.Num())
{
SubmitBarriers(MakeArrayView(BarrierArrays.MemoryBarriers), MakeArrayView(TempBufferBarriers), MakeArrayView(TempImageBarriers));
TempBufferBarriers.Reset();
TempImageBarriers.Reset();
}
if (bNeedsPostLayoutChange)
{
// Remove unsupported flags if we're submitting on the compute queue
if (ExecutingPipeline == ERHIPipeline::AsyncCompute)
{
MaskSupportedAsyncFlags(Context.Device, BarrierArrays.BufferBarriers, true, false);
MaskSupportedAsyncFlags(Context.Device, BarrierArrays.ImageBarriers, true, false);
}
SubmitBarriers(MakeArrayView(BarrierArrays.MemoryBarriers), MakeArrayView(BarrierArrays.BufferBarriers), MakeArrayView(BarrierArrays.ImageBarriers));
}
}
}
static void ProcessCrossQueueSemaphores(FVulkanCommandListContext& Context, TArrayView<const FRHITransition*>& Transitions, bool bIsBeginTransition)
{
for (const FRHITransition* Transition : Transitions)
{
const FVulkanTransitionData* Data = Transition->GetPrivateData<FVulkanTransitionData>();
if (Data->Semaphore)
{
check(Data->SrcPipelines != Data->DstPipelines);
if (bIsBeginTransition)
{
if ((IsSingleRHIPipeline(Data->SrcPipelines) && (Data->SrcPipelines == Context.GetPipeline())) ||
(IsSingleRHIPipeline(Data->DstPipelines) && (Data->DstPipelines != Context.GetPipeline())))
{
Context.AddSignalSemaphore(Data->Semaphore);
}
}
else
{
if ((IsSingleRHIPipeline(Data->DstPipelines) && (Data->DstPipelines == Context.GetPipeline())) ||
(IsSingleRHIPipeline(Data->SrcPipelines) && (Data->SrcPipelines != Context.GetPipeline())))
{
Context.AddWaitSemaphore(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, Data->Semaphore);
}
}
}
}
}
void FVulkanCommandListContext::RHIBeginTransitions(TArrayView<const FRHITransition*> Transitions)
{
if (Device.SupportsParallelRendering())
{
constexpr bool bIsBeginTransition = true;
ProcessTransitionSync2(*this, Transitions, bIsBeginTransition);
// Signal semaphores after the release barriers
ProcessCrossQueueSemaphores(*this, Transitions, bIsBeginTransition);
}
else
{
// Nothing to do for legacy barriers on begin (no split support, no async compute support)
}
}
void FVulkanCommandListContext::RHIEndTransitions(TArrayView<const FRHITransition*> Transitions)
{
if (Device.SupportsParallelRendering())
{
constexpr bool bIsBeginTransition = false;
// Wait on semaphores before the acquire barriers
ProcessCrossQueueSemaphores(*this, Transitions, bIsBeginTransition);
ProcessTransitionSync2(*this, Transitions, bIsBeginTransition);
}
else
{
ProcessTransitionLegacy(*this, Transitions);
}
}
void FVulkanPipelineBarrier::AddMemoryBarrier(VkAccessFlags InSrcAccessFlags, VkAccessFlags InDstAccessFlags, VkPipelineStageFlags InSrcStageMask, VkPipelineStageFlags InDstStageMask)
{
if (MemoryBarriers.Num() == 0)
{
VkMemoryBarrier2& NewBarrier = MemoryBarriers.AddDefaulted_GetRef();
ZeroVulkanStruct(NewBarrier, VK_STRUCTURE_TYPE_MEMORY_BARRIER_2);
}
// Mash everything into a single barrier
VkMemoryBarrier2& MemoryBarrier = MemoryBarriers[0];
MergeBarrierAccessMask(MemoryBarrier, InSrcAccessFlags, InDstAccessFlags);
MemoryBarrier.srcStageMask |= InSrcStageMask;
MemoryBarrier.dstStageMask |= InDstStageMask;
}
//
// Methods used when the RHI itself needs to perform a layout transition. The public API functions do not call these,
// they fill in the fields of FVulkanPipelineBarrier using their own logic, based on the ERHIAccess flags.
//
void FVulkanPipelineBarrier::AddFullImageLayoutTransition(const FVulkanTexture& Texture, VkImageLayout SrcLayout, VkImageLayout DstLayout)
{
const VkPipelineStageFlags SrcStageMask = GetVkStageFlagsForLayout(SrcLayout);
const VkPipelineStageFlags DstStageMask = GetVkStageFlagsForLayout(DstLayout);
const VkAccessFlags SrcAccessFlags = GetVkAccessMaskForLayout(SrcLayout);
const VkAccessFlags DstAccessFlags = GetVkAccessMaskForLayout(DstLayout);
const VkImageSubresourceRange SubresourceRange = MakeSubresourceRange(Texture.GetFullAspectMask());
if (Texture.IsDepthOrStencilAspect())
{
SrcLayout = VulkanRHI::GetMergedDepthStencilLayout(SrcLayout, SrcLayout);
DstLayout = VulkanRHI::GetMergedDepthStencilLayout(DstLayout, DstLayout);
}
VkImageMemoryBarrier2& ImgBarrier = ImageBarriers.AddDefaulted_GetRef();
SetupImageBarrier(ImgBarrier, Texture.Image, SrcStageMask, DstStageMask, SrcAccessFlags, DstAccessFlags, SrcLayout, DstLayout, SubresourceRange);
}
void FVulkanPipelineBarrier::AddImageLayoutTransition(VkImage Image, VkImageLayout SrcLayout, VkImageLayout DstLayout, const VkImageSubresourceRange& SubresourceRange)
{
const VkPipelineStageFlags SrcStageMask = GetVkStageFlagsForLayout(SrcLayout);
const VkPipelineStageFlags DstStageMask = GetVkStageFlagsForLayout(DstLayout);
const VkAccessFlags SrcAccessFlags = GetVkAccessMaskForLayout(SrcLayout);
const VkAccessFlags DstAccessFlags = GetVkAccessMaskForLayout(DstLayout);
VkImageMemoryBarrier2& ImgBarrier = ImageBarriers.AddDefaulted_GetRef();
SetupImageBarrier(ImgBarrier, Image, SrcStageMask, DstStageMask, SrcAccessFlags, DstAccessFlags, SrcLayout, DstLayout, SubresourceRange);
}
void FVulkanPipelineBarrier::AddImageAccessTransition(const FVulkanTexture& Surface, ERHIAccess SrcAccess, ERHIAccess DstAccess, const VkImageSubresourceRange& SubresourceRange, VkImageLayout& InOutLayout)
{
// This function should only be used for known states.
check(DstAccess != ERHIAccess::Unknown);
const bool bIsDepthStencil = Surface.IsDepthOrStencilAspect();
const bool bSupportsReadOnlyOptimal = Surface.SupportsSampling();
VkPipelineStageFlags ImgSrcStage, ImgDstStage;
VkAccessFlags SrcAccessFlags, DstAccessFlags;
VkImageLayout SrcLayout, DstLayout;
GetVkStageAndAccessFlags(SrcAccess, FRHITransitionInfo::EType::Texture, 0, bIsDepthStencil, bSupportsReadOnlyOptimal, ImgSrcStage, SrcAccessFlags, SrcLayout, true);
GetVkStageAndAccessFlags(DstAccess, FRHITransitionInfo::EType::Texture, 0, bIsDepthStencil, bSupportsReadOnlyOptimal, ImgDstStage, DstAccessFlags, DstLayout, false);
// If not compute, remove vertex pipeline bits as only compute updates vertex buffers
if (!(ImgSrcStage & VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT))
{
ImgDstStage &= ~(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT);
}
if (SrcLayout == VK_IMAGE_LAYOUT_UNDEFINED)
{
SrcLayout = InOutLayout;
SrcAccessFlags = GetVkAccessMaskForLayout(SrcLayout);
}
else
{
ensure(SrcLayout == InOutLayout);
}
if (DstLayout == VK_IMAGE_LAYOUT_UNDEFINED)
{
DstLayout = VK_IMAGE_LAYOUT_GENERAL;
}
VkImageMemoryBarrier2& ImgBarrier = ImageBarriers.AddDefaulted_GetRef();
SetupImageBarrier(ImgBarrier, Surface.Image, ImgSrcStage, ImgDstStage, SrcAccessFlags, DstAccessFlags, SrcLayout, DstLayout, SubresourceRange);
InOutLayout = DstLayout;
}
void FVulkanPipelineBarrier::Execute(VkCommandBuffer CmdBuffer)
{
if (MemoryBarriers.Num() != 0 || BufferBarriers.Num() != 0 || ImageBarriers.Num() != 0)
{
VkPipelineStageFlags SrcStageMask = 0;
VkPipelineStageFlags DstStageMask = 0;
TArray<VkMemoryBarrier, TInlineAllocator<1>> TempMemoryBarriers;
DowngradeBarrierArray(TempMemoryBarriers, MemoryBarriers, SrcStageMask, DstStageMask);
TArray<VkBufferMemoryBarrier> TempBufferBarriers;
DowngradeBarrierArray(TempBufferBarriers, BufferBarriers, SrcStageMask, DstStageMask);
TArray<VkImageMemoryBarrier, TInlineAllocator<2>> TempImageBarriers;
DowngradeBarrierArray(TempImageBarriers, ImageBarriers, SrcStageMask, DstStageMask);
MergeDepthStencilLayouts(TempImageBarriers);
VulkanRHI::vkCmdPipelineBarrier(CmdBuffer, SrcStageMask, DstStageMask, 0, TempMemoryBarriers.Num(), TempMemoryBarriers.GetData(),
TempBufferBarriers.Num(), TempBufferBarriers.GetData(), TempImageBarriers.Num(), TempImageBarriers.GetData());
}
}
void FVulkanPipelineBarrier::Execute(FVulkanCommandBuffer* CmdBuffer)
{
if (MemoryBarriers.Num() != 0 || BufferBarriers.Num() != 0 || ImageBarriers.Num() != 0)
{
if (CmdBuffer->Device.SupportsParallelRendering())
{
VkDependencyInfo DependencyInfo;
DependencyInfo.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO;
DependencyInfo.pNext = nullptr;
DependencyInfo.dependencyFlags = 0;
DependencyInfo.memoryBarrierCount = MemoryBarriers.Num();
DependencyInfo.pMemoryBarriers = MemoryBarriers.GetData();
DependencyInfo.bufferMemoryBarrierCount = BufferBarriers.Num();
DependencyInfo.pBufferMemoryBarriers = BufferBarriers.GetData();
DependencyInfo.imageMemoryBarrierCount = ImageBarriers.Num();
DependencyInfo.pImageMemoryBarriers = ImageBarriers.GetData();
VulkanRHI::vkCmdPipelineBarrier2KHR(CmdBuffer->GetHandle(), &DependencyInfo);
}
else
{
// Call the original execute with older types
Execute(CmdBuffer->GetHandle());
}
}
}
VkImageSubresourceRange FVulkanPipelineBarrier::MakeSubresourceRange(VkImageAspectFlags AspectMask, uint32 FirstMip, uint32 NumMips, uint32 FirstLayer, uint32 NumLayers)
{
VkImageSubresourceRange Range;
Range.aspectMask = AspectMask;
Range.baseMipLevel = FirstMip;
Range.levelCount = NumMips;
Range.baseArrayLayer = FirstLayer;
Range.layerCount = NumLayers;
return Range;
}
//
// Used when we need to change the layout of a single image. Some plug-ins call this function from outside the RHI (Steam VR, at the time of writing this).
//
void VulkanSetImageLayout(FVulkanCommandBuffer* CmdBuffer, VkImage Image, VkImageLayout OldLayout, VkImageLayout NewLayout, const VkImageSubresourceRange& SubresourceRange)
{
FVulkanPipelineBarrier Barrier;
Barrier.AddImageLayoutTransition(Image, OldLayout, NewLayout, SubresourceRange);
Barrier.Execute(CmdBuffer);
}
VkImageLayout FVulkanPipelineBarrier::GetDefaultLayout(const FVulkanTexture& VulkanTexture, ERHIAccess DesiredAccess)
{
switch (DesiredAccess)
{
case ERHIAccess::SRVCompute:
case ERHIAccess::SRVGraphics:
case ERHIAccess::SRVGraphicsNonPixel:
case ERHIAccess::SRVGraphicsPixel:
case ERHIAccess::SRVMask:
{
if (VulkanTexture.IsDepthOrStencilAspect())
{
if (VulkanTexture.Device->SupportsParallelRendering())
{
return VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL;
}
else
{
return VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
}
}
else
{
return VulkanTexture.SupportsSampling() ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
}
}
case ERHIAccess::UAVCompute:
case ERHIAccess::UAVGraphics:
case ERHIAccess::UAVMask: return VK_IMAGE_LAYOUT_GENERAL;
case ERHIAccess::CopySrc: return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
case ERHIAccess::CopyDest: return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
case ERHIAccess::DSVRead: return VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL;
case ERHIAccess::DSVWrite: return VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
case ERHIAccess::ShadingRateSource:
{
if (GRHIVariableRateShadingImageDataType == VRSImage_Palette)
{
return VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
}
else if (GRHIVariableRateShadingImageDataType == VRSImage_Fractional)
{
return VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT;
}
}
default:
checkNoEntry();
return VK_IMAGE_LAYOUT_UNDEFINED;
}
}