Files
UnrealEngine/Engine/Build/Android/Prebuilt/PSOService/Source/PSOService.cpp
2025-05-18 13:04:45 +08:00

1149 lines
38 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
PSOService.cpp: Vulkan PSO compilation service
=============================================================================*/
#include <jni.h>
#include <android/log.h>
#include "vulkan/vulkan.h"
#include <vector>
#include <list>
#include <string>
#include <android/sharedmem.h>
#include <sys/mman.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/resource.h>
#include <dirent.h>
#define ENABLETRACING 0
#if ENABLETRACING
#include <android/trace.h>
#define BEGIN_TRACE(x) ATrace_beginSection(x)
#define END_TRACE() ATrace_endSection()
#else
#define BEGIN_TRACE(x)
#define END_TRACE()
#endif
#define APPNAME "UEPSOService"
#define LOG_ERROR(...) __android_log_print(ANDROID_LOG_ERROR, APPNAME, ## __VA_ARGS__)
#ifdef NDEBUG
#define LOG_INFO(...)
#define LOG_VERBOSE(...)
#else
#define LOG_INFO(...) __android_log_print(ANDROID_LOG_DEBUG, APPNAME, ## __VA_ARGS__)
#define LOG_VERBOSE(...) __android_log_print(ANDROID_LOG_VERBOSE, APPNAME, ## __VA_ARGS__)
#endif
#define JNI_METHOD __attribute__ ((visibility ("default"))) extern "C"
VKAPI_ATTR VkBool32 VKAPI_CALL VKValidationCallback(
VkDebugReportFlagsEXT flags,
VkDebugReportObjectTypeEXT objectType,
uint64_t object,
size_t location,
int32_t messageCode,
const char* pLayerPrefix,
const char* pMessage,
void* pUserData)
{
LOG_INFO( "VK Validation: %s", pMessage);
return VK_FALSE;
}
// gets current time in seconds
double now_s()
{
struct timespec res;
clock_gettime(CLOCK_REALTIME, &res);
return (double) res.tv_sec + (double) res.tv_nsec / 1e9;
}
class FVulkanPSOCompiler
{
bool bInitialized = false;
VkDevice Device = VK_NULL_HANDLE;
VkInstance Instance = VK_NULL_HANDLE;
std::vector<VkPhysicalDevice> devices;
PFN_vkCreateRenderPass2KHR vkCreateRenderPass2;
VkPipelineCache PipelineCache = VK_NULL_HANDLE;
public:
static FVulkanPSOCompiler& Get()
{
static FVulkanPSOCompiler Single;
return Single;
}
void InitDevice(std::vector<const char*>& InstanceLayers, std::vector<const char*>& InstanceExtensions, std::vector<const char*>& DeviceExtensions)
{
if (bInitialized)
return;
BEGIN_TRACE("InitDevice");
bInitialized = true;
VkApplicationInfo appInfo{};
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
appInfo.pApplicationName = APPNAME;
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.pEngineName = nullptr;
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.apiVersion = VK_API_VERSION_1_1;
VkInstanceCreateInfo createInfo{};
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
createInfo.pApplicationInfo = &appInfo;
createInfo.enabledExtensionCount = InstanceExtensions.size();
createInfo.ppEnabledExtensionNames = InstanceExtensions.data();
createInfo.enabledLayerCount = InstanceLayers.size();
createInfo.ppEnabledLayerNames = InstanceLayers.data();
bool bEnableValidation = false;
for (uint32_t Idx; Idx < InstanceLayers.size(); ++Idx)
{
bEnableValidation = strcmp(InstanceLayers[Idx], "VK_LAYER_KHRONOS_validation") == 0;
if (bEnableValidation)
{
LOG_INFO( " VK_LAYER_KHRONOS_validation Validation Enabled");
break;
}
}
VkResult Result;
Result = vkCreateInstance(&createInfo, NULL, &Instance);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " Failed to Create VKInstance %d ", Result);
exit(-1);
}
/* Load VK_EXT_debug_report entry points in debug builds */
if (bEnableValidation)
{
PFN_vkCreateDebugReportCallbackEXT vkCreateDebugReportCallbackEXT = reinterpret_cast<PFN_vkCreateDebugReportCallbackEXT>(vkGetInstanceProcAddr(Instance, "vkCreateDebugReportCallbackEXT"));
PFN_vkDebugReportMessageEXT vkDebugReportMessageEXT = reinterpret_cast<PFN_vkDebugReportMessageEXT>(vkGetInstanceProcAddr(Instance, "vkDebugReportMessageEXT"));
PFN_vkDestroyDebugReportCallbackEXT vkDestroyDebugReportCallbackEXT = reinterpret_cast<PFN_vkDestroyDebugReportCallbackEXT>(vkGetInstanceProcAddr(Instance, "vkDestroyDebugReportCallbackEXT"));
VkDebugReportCallbackCreateInfoEXT CallbackCreateInfo;
CallbackCreateInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT;
CallbackCreateInfo.pNext = nullptr;
CallbackCreateInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT;
CallbackCreateInfo.pfnCallback = &VKValidationCallback;
CallbackCreateInfo.pUserData = nullptr;
/* Register the callback */
VkDebugReportCallbackEXT Callback;
Result = vkCreateDebugReportCallbackEXT(Instance, &CallbackCreateInfo, nullptr, &Callback);
LOG_INFO( " Created Debug Hooks %d ", Result);
}
// Get the number of devices (GPUs) available.
uint32_t gpu_count = 0;
Result = vkEnumeratePhysicalDevices(Instance, &gpu_count, NULL);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " Failed to Enumerate Physical Devices 1 %d ", Result);
exit(-1);
}
// Allocate space and get the list of devices.
devices.resize(gpu_count);
Result = vkEnumeratePhysicalDevices(Instance, &gpu_count, devices.data());
if (Result != VK_SUCCESS)
{
LOG_ERROR( " Failed to Enumerate Physical Devices 2 %d ", Result);
}
uint32_t queue_count = 0;
vkGetPhysicalDeviceQueueFamilyProperties(devices[0], &queue_count, nullptr);
std::vector<VkQueueFamilyProperties> queues(queue_count);
vkGetPhysicalDeviceQueueFamilyProperties(devices[0], &queue_count, queues.data());
uint32_t gfx_queue_idx = 0;
bool found = false;
for (unsigned int i = 0; i < queue_count; i++)
{
if (queues[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) {
gfx_queue_idx = i;
found = true;
break;
}
}
VkPhysicalDeviceFeatures PhysicalFeatures;
vkGetPhysicalDeviceFeatures(devices[0], &PhysicalFeatures);
PhysicalFeatures.shaderResourceResidency = VK_FALSE;
PhysicalFeatures.shaderResourceMinLod = VK_FALSE;
PhysicalFeatures.sparseBinding = VK_FALSE;
PhysicalFeatures.sparseResidencyBuffer = VK_FALSE;
PhysicalFeatures.sparseResidencyImage2D = VK_FALSE;
PhysicalFeatures.sparseResidencyImage3D = VK_FALSE;
PhysicalFeatures.sparseResidency2Samples = VK_FALSE;
PhysicalFeatures.sparseResidency4Samples = VK_FALSE;
PhysicalFeatures.sparseResidency8Samples = VK_FALSE;
PhysicalFeatures.sparseResidencyAliased = VK_FALSE;
VkDeviceQueueCreateInfo queueCreateInfo{};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.queueFamilyIndex = gfx_queue_idx;
queueCreateInfo.queueCount = queues[gfx_queue_idx].queueCount;
float* QueuePriorities = (float*)alloca(queues[gfx_queue_idx].queueCount * sizeof(float));
memset(QueuePriorities, 0, queues[gfx_queue_idx].queueCount * sizeof(float));
queueCreateInfo.pQueuePriorities = QueuePriorities;
VkDeviceCreateInfo device_info = {};
device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
device_info.pNext = NULL;
device_info.queueCreateInfoCount = 1;
device_info.pQueueCreateInfos = &queueCreateInfo;
device_info.enabledLayerCount = 0;
device_info.ppEnabledLayerNames = NULL;
device_info.enabledExtensionCount = DeviceExtensions.size();
device_info.ppEnabledExtensionNames = DeviceExtensions.data();
device_info.pEnabledFeatures = &PhysicalFeatures;
Result = vkCreateDevice(devices[0], &device_info, NULL, &Device);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " Failed to Create Device %d ", Result);
}
vkCreateRenderPass2 = (PFN_vkCreateRenderPass2KHR)vkGetDeviceProcAddr(Device, "vkCreateRenderPass2KHR");
if (vkCreateRenderPass2 == nullptr)
{
LOG_ERROR( "Failed getting pointer to vkCreateRenderPass2 ");
}
END_TRACE();
}
void ShutDownDevice()
{
bInitialized = false;
if (Device == VK_NULL_HANDLE)
{
return;
}
DestroyPipelineCache();
vkDestroyDevice(Device, nullptr);
vkDestroyInstance(Instance, nullptr);
Device = VK_NULL_HANDLE;
Instance = VK_NULL_HANDLE;
}
struct GraphicsPipelineCreateInfo
{
VkPipelineCreateFlags PipelineCreateFlags;
uint32_t StageCount;
bool bHasVkPipelineVertexInputStateCreateInfo;
bool bHasVkPipelineInputAssemblyStateCreateInfo;
bool bHasVkPipelineTessellationStateCreateInfo;
bool bHasVkPipelineViewportStateCreateInfo;
bool bHasVkPipelineRasterizationStateCreateInfo;
bool bHasVkPipelineMultisampleStateCreateInfo;
bool bHasVkPipelineDepthStencilStateCreateInfo;
bool bHasVkPipelineColorBlendStateCreateInfo;
bool bHasVkPipelineDynamicStateCreateInfo;
uint32_t subpass;
};
#define COPY_FROM_BUFFER(Dst, Src, Offset, Size) \
memcpy(Dst, &Src[Offset], Size); \
Offset += Size;
void BufferToCharArray(std::vector<const char*>& CharArray, const uint8_t* MemoryStream, uint32_t& MemoryOffset)
{
uint32_t Count;
COPY_FROM_BUFFER(&Count, MemoryStream, MemoryOffset, sizeof(uint32_t));
for (uint32_t Idx = 0; Idx < Count; ++Idx)
{
uint32_t StrLength;
COPY_FROM_BUFFER(&StrLength, MemoryStream, MemoryOffset, sizeof(uint32_t));
CharArray.push_back((const char*)&MemoryStream[MemoryOffset]);
MemoryOffset += StrLength;
}
}
void DestroyPipelineCache()
{
if(PipelineCache != VK_NULL_HANDLE)
{
vkDestroyPipelineCache(Device, PipelineCache, nullptr);
PipelineCache = VK_NULL_HANDLE;
}
}
std::string CompileGFXPSO(const uint8_t* VS, uint64_t VSSize, const uint8_t* PS, uint64_t PSSize, const uint8_t* PSO, uint64_t PSOSize, const uint8_t* PSOCacheDataSource, uint64_t PSOCacheDataSourceSize)
{
BEGIN_TRACE("CompileGFXPSO");
std::string errorLog;
uint32_t MemoryOffset = 0;
// Read extensions and layers
std::vector<const char*> InstanceLayers;
BufferToCharArray(InstanceLayers, PSO, MemoryOffset);
std::vector<const char*> InstanceExtensions;
BufferToCharArray(InstanceExtensions, PSO, MemoryOffset);
std::vector<const char*> DeviceExtensions;
BufferToCharArray(DeviceExtensions, PSO, MemoryOffset);
InitDevice(InstanceLayers, InstanceExtensions, DeviceExtensions);
// Free PSO Cache
VkResult Result;
GraphicsPipelineCreateInfo PipelineCreateInfo;
VkGraphicsPipelineCreateInfo CreateInfo;
// clear any existing cache
DestroyPipelineCache();
//LOG_INFO( "CompileGFXPSO: VSSize %d, PSSize %d, PSOSize %d", (uint32_t)VSSize, (uint32_t)PSSize, (uint32_t)PSOSize);
// Create PSO
COPY_FROM_BUFFER(&PipelineCreateInfo, PSO, MemoryOffset, sizeof(GraphicsPipelineCreateInfo));
memset(&CreateInfo, 0, sizeof(VkGraphicsPipelineCreateInfo));
CreateInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
CreateInfo.flags = PipelineCreateInfo.PipelineCreateFlags;
CreateInfo.stageCount = PipelineCreateInfo.StageCount;
CreateInfo.subpass = PipelineCreateInfo.subpass;
// FSR
bool bHasFSRCreateInfo = false;
COPY_FROM_BUFFER(&bHasFSRCreateInfo, PSO, MemoryOffset, sizeof(bool));
VkPipelineFragmentShadingRateStateCreateInfoKHR FSRCreateInfo;
if (bHasFSRCreateInfo)
{
COPY_FROM_BUFFER(&FSRCreateInfo, PSO, MemoryOffset, sizeof(VkPipelineFragmentShadingRateStateCreateInfoKHR));
FSRCreateInfo.pNext = nullptr;
CreateInfo.pNext = &FSRCreateInfo;
}
VkPipelineShaderStageCreateInfo ShaderStages[2];
// VkPipelineShaderStageCreateInfo
for (int32_t Idx = 0; Idx < PipelineCreateInfo.StageCount; ++Idx)
{
bool bHasSubGroupSizeInfo = false;
COPY_FROM_BUFFER(&bHasSubGroupSizeInfo, PSO, MemoryOffset, sizeof(bool));
void* PipelineShaderStageCreatePNext = nullptr;
if (bHasSubGroupSizeInfo)
{
PipelineShaderStageCreatePNext = (void*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkPipelineShaderStageRequiredSubgroupSizeCreateInfo);
}
COPY_FROM_BUFFER(&ShaderStages[Idx], PSO, MemoryOffset, sizeof(VkPipelineShaderStageCreateInfo));
uint32_t NameLength;
COPY_FROM_BUFFER(&NameLength, PSO, MemoryOffset, sizeof(uint32_t));
ShaderStages[Idx].pName = (const char*)&PSO[MemoryOffset];
ShaderStages[Idx].pNext = PipelineShaderStageCreatePNext;
MemoryOffset += NameLength;
}
CreateInfo.pStages = ShaderStages;
VkPipelineVertexInputStateCreateInfo VertexInputState;
if (PipelineCreateInfo.bHasVkPipelineVertexInputStateCreateInfo)
{
COPY_FROM_BUFFER(&VertexInputState, PSO, MemoryOffset, sizeof(VkPipelineVertexInputStateCreateInfo));
if (VertexInputState.vertexBindingDescriptionCount > 0)
{
uint32_t Length = VertexInputState.vertexBindingDescriptionCount * sizeof(VkVertexInputBindingDescription);
VertexInputState.pVertexBindingDescriptions = (VkVertexInputBindingDescription*)&PSO[MemoryOffset];
MemoryOffset += Length;
}
if (VertexInputState.vertexAttributeDescriptionCount > 0)
{
uint32_t Length = VertexInputState.vertexAttributeDescriptionCount * sizeof(VkVertexInputAttributeDescription);
VertexInputState.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription*)&PSO[MemoryOffset];
MemoryOffset += Length;
}
CreateInfo.pVertexInputState = &VertexInputState;
}
VkPipelineInputAssemblyStateCreateInfo InputAssemblyCreateInfo;
if (PipelineCreateInfo.bHasVkPipelineInputAssemblyStateCreateInfo)
{
COPY_FROM_BUFFER(&InputAssemblyCreateInfo, PSO, MemoryOffset, sizeof(VkPipelineInputAssemblyStateCreateInfo));
CreateInfo.pInputAssemblyState = &InputAssemblyCreateInfo;
}
VkPipelineTessellationStateCreateInfo TesselationCreateInfo;
if (PipelineCreateInfo.bHasVkPipelineTessellationStateCreateInfo)
{
COPY_FROM_BUFFER(&TesselationCreateInfo, PSO, MemoryOffset, sizeof(VkPipelineTessellationStateCreateInfo));
CreateInfo.pTessellationState = &TesselationCreateInfo;
}
VkPipelineViewportStateCreateInfo ViewportState;
if (PipelineCreateInfo.bHasVkPipelineViewportStateCreateInfo)
{
COPY_FROM_BUFFER(&ViewportState, PSO, MemoryOffset, sizeof(VkPipelineViewportStateCreateInfo));
uint32_t ViewportCount;
COPY_FROM_BUFFER(&ViewportCount, PSO, MemoryOffset, sizeof(uint32_t));
if (ViewportCount > 0)
{
ViewportState.pViewports = (VkViewport*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkViewport) * ViewportCount;
}
uint32_t ScissorCount;
COPY_FROM_BUFFER(&ScissorCount, PSO, MemoryOffset, sizeof(uint32_t));
if (ScissorCount > 0)
{
ViewportState.pScissors = (VkRect2D*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkRect2D) * ScissorCount;
}
CreateInfo.pViewportState = &ViewportState;
}
if (PipelineCreateInfo.bHasVkPipelineRasterizationStateCreateInfo)
{
CreateInfo.pRasterizationState = (VkPipelineRasterizationStateCreateInfo*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkPipelineRasterizationStateCreateInfo);
}
if (PipelineCreateInfo.bHasVkPipelineMultisampleStateCreateInfo)
{
CreateInfo.pMultisampleState = (VkPipelineMultisampleStateCreateInfo*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkPipelineMultisampleStateCreateInfo);
}
if (PipelineCreateInfo.bHasVkPipelineDepthStencilStateCreateInfo)
{
CreateInfo.pDepthStencilState = (VkPipelineDepthStencilStateCreateInfo*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkPipelineDepthStencilStateCreateInfo);
}
VkPipelineColorBlendStateCreateInfo ColorBlendState;
if (PipelineCreateInfo.bHasVkPipelineColorBlendStateCreateInfo)
{
COPY_FROM_BUFFER(&ColorBlendState, PSO, MemoryOffset, sizeof(VkPipelineColorBlendStateCreateInfo));
if (ColorBlendState.attachmentCount > 0)
{
ColorBlendState.pAttachments = (VkPipelineColorBlendAttachmentState*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkPipelineColorBlendAttachmentState) * ColorBlendState.attachmentCount;
}
CreateInfo.pColorBlendState = &ColorBlendState;
}
VkPipelineDynamicStateCreateInfo DynamicState;
if (PipelineCreateInfo.bHasVkPipelineDynamicStateCreateInfo)
{
COPY_FROM_BUFFER(&DynamicState, PSO, MemoryOffset, sizeof(VkPipelineDynamicStateCreateInfo));
if (DynamicState.dynamicStateCount > 0)
{
DynamicState.pDynamicStates = (VkDynamicState*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkDynamicState) * DynamicState.dynamicStateCount;
}
CreateInfo.pDynamicState = &DynamicState;
}
VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo;
COPY_FROM_BUFFER(&PipelineLayoutCreateInfo, PSO, MemoryOffset, sizeof(VkPipelineLayoutCreateInfo));
VkDescriptorSetLayoutCreateInfo* DescriptorSetLayoutInfos = nullptr;
VkDescriptorSetLayout* DescriptorSetLayouts = nullptr;
if (PipelineLayoutCreateInfo.setLayoutCount > 0)
{
DescriptorSetLayoutInfos = new VkDescriptorSetLayoutCreateInfo[PipelineLayoutCreateInfo.setLayoutCount];
DescriptorSetLayouts = new VkDescriptorSetLayout[PipelineLayoutCreateInfo.setLayoutCount];
for (uint32_t Idx = 0; Idx < PipelineLayoutCreateInfo.setLayoutCount; ++Idx)
{
uint32_t SetBindingsCount;
COPY_FROM_BUFFER(&SetBindingsCount, PSO, MemoryOffset, sizeof(uint32_t));
DescriptorSetLayoutInfos[Idx].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
DescriptorSetLayoutInfos[Idx].pNext = nullptr;
DescriptorSetLayoutInfos[Idx].flags = 0;
DescriptorSetLayoutInfos[Idx].bindingCount = SetBindingsCount;
DescriptorSetLayoutInfos[Idx].pBindings = (VkDescriptorSetLayoutBinding*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkDescriptorSetLayoutBinding) * SetBindingsCount;
vkCreateDescriptorSetLayout(Device, &DescriptorSetLayoutInfos[Idx], nullptr, &DescriptorSetLayouts[Idx]);
}
PipelineLayoutCreateInfo.pSetLayouts = DescriptorSetLayouts;
}
VkPipelineLayout PipelineLayout;
Result = vkCreatePipelineLayout(Device, &PipelineLayoutCreateInfo, nullptr, &PipelineLayout);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " vkCreatePipelineLayout Failed %d ", Result);
exit(-1);
}
CreateInfo.layout = PipelineLayout;
VkRenderPass RenderPass;
bool bUseRenderPass2;
COPY_FROM_BUFFER(&bUseRenderPass2, PSO, MemoryOffset, sizeof(bool));
if (bUseRenderPass2)
{
// Render pass
VkRenderPassCreateInfo2KHR RenderPassCreateInfo;
COPY_FROM_BUFFER(&RenderPassCreateInfo, PSO, MemoryOffset, sizeof(VkRenderPassCreateInfo2KHR));
// Check for VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT
bool bHasCreateInfoNext = false;
COPY_FROM_BUFFER(&bHasCreateInfoNext, PSO, MemoryOffset, sizeof(bool));
if (bHasCreateInfoNext)
{
RenderPassCreateInfo.pNext = &PSO[MemoryOffset];
MemoryOffset += sizeof(VkRenderPassFragmentDensityMapCreateInfoEXT);
}
if (RenderPassCreateInfo.attachmentCount > 0)
{
RenderPassCreateInfo.pAttachments = (VkAttachmentDescription2KHR*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentDescription2KHR) * RenderPassCreateInfo.attachmentCount;
}
if (RenderPassCreateInfo.dependencyCount > 0)
{
RenderPassCreateInfo.pDependencies = (VkSubpassDependency2KHR*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkSubpassDependency2KHR) * RenderPassCreateInfo.dependencyCount;
}
VkSubpassDescription2KHR* SubpassDescriptions = new VkSubpassDescription2KHR[RenderPassCreateInfo.subpassCount];
std::vector<VkFragmentShadingRateAttachmentInfoKHR> FSRAttachmentInfos;
std::vector<VkAttachmentReference2KHR> DepthStencilAttachments;
FSRAttachmentInfos.resize(RenderPassCreateInfo.subpassCount);
DepthStencilAttachments.resize(RenderPassCreateInfo.subpassCount);
for (uint32_t Idx = 0; Idx < RenderPassCreateInfo.subpassCount; ++Idx)
{
COPY_FROM_BUFFER(&SubpassDescriptions[Idx], PSO, MemoryOffset, sizeof(VkSubpassDescription2KHR));
// Add additional pNext structs
// FSR
bool bHasFSRAttachmentInfo = false;
COPY_FROM_BUFFER(&bHasFSRAttachmentInfo, PSO, MemoryOffset, sizeof(bool));;
if (bHasFSRAttachmentInfo)
{
FSRAttachmentInfos[Idx] = VkFragmentShadingRateAttachmentInfoKHR();
auto& FSRAttachmentInfo = FSRAttachmentInfos[Idx];
FSRAttachmentInfo.pNext = nullptr;
FSRAttachmentInfo.sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR;
FSRAttachmentInfo.pFragmentShadingRateAttachment = (VkAttachmentReference2KHR*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentReference2KHR);
FSRAttachmentInfo.shadingRateAttachmentTexelSize = *(VkExtent2D*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkExtent2D);
SubpassDescriptions[Idx].pNext = &FSRAttachmentInfo;
}
if (SubpassDescriptions[Idx].colorAttachmentCount > 0)
{
SubpassDescriptions[Idx].pColorAttachments = (VkAttachmentReference2KHR*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentReference2KHR) * SubpassDescriptions[Idx].colorAttachmentCount;
}
if (SubpassDescriptions[Idx].inputAttachmentCount > 0)
{
SubpassDescriptions[Idx].pInputAttachments = (VkAttachmentReference2KHR*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentReference2KHR) * SubpassDescriptions[Idx].inputAttachmentCount;
}
bool bHasResolveAttachment;
COPY_FROM_BUFFER(&bHasResolveAttachment, PSO, MemoryOffset, sizeof(bool));
if (bHasResolveAttachment)
{
if (SubpassDescriptions[Idx].colorAttachmentCount > 0)
{
SubpassDescriptions[Idx].pResolveAttachments = (VkAttachmentReference2KHR*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentReference2KHR) * SubpassDescriptions[Idx].colorAttachmentCount;
}
}
bool bHasDepthStencilAttachment;
COPY_FROM_BUFFER(&bHasDepthStencilAttachment, PSO, MemoryOffset, sizeof(bool));
if (bHasDepthStencilAttachment)
{
bool bHasStencilLayout;
COPY_FROM_BUFFER(&bHasStencilLayout, PSO, MemoryOffset, sizeof(bool));
void* pDepthStencilAttachmentPNext = nullptr;
if(bHasStencilLayout)
{
pDepthStencilAttachmentPNext = (void*) & PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentReferenceStencilLayout);
}
DepthStencilAttachments[Idx] = *(VkAttachmentReference2KHR*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentReference2KHR);
auto& DepthStencilAttachment = DepthStencilAttachments.back();
DepthStencilAttachments[Idx].pNext = pDepthStencilAttachmentPNext;
SubpassDescriptions[Idx].pDepthStencilAttachment = &DepthStencilAttachments[Idx];
}
}
RenderPassCreateInfo.pSubpasses = SubpassDescriptions;
if (RenderPassCreateInfo.correlatedViewMaskCount > 0)
{
RenderPassCreateInfo.pCorrelatedViewMasks = (uint32_t*)&PSO[MemoryOffset];
MemoryOffset += sizeof(uint32_t) * RenderPassCreateInfo.correlatedViewMaskCount;
}
Result = vkCreateRenderPass2(Device, &RenderPassCreateInfo, nullptr, &RenderPass);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " vkCreateRenderPass2 Failed %d ", Result);
exit(-1);
}
delete[] SubpassDescriptions;
}
else
{
// Render pass
VkRenderPassCreateInfo RenderPassCreateInfo;
COPY_FROM_BUFFER(&RenderPassCreateInfo, PSO, MemoryOffset, sizeof(VkRenderPassCreateInfo));
// Check for VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT
bool bHasCreateInfoNext = false;
COPY_FROM_BUFFER(&bHasCreateInfoNext, PSO, MemoryOffset, sizeof(bool));
if (bHasCreateInfoNext)
{
RenderPassCreateInfo.pNext = &PSO[MemoryOffset];
MemoryOffset += sizeof(VkRenderPassFragmentDensityMapCreateInfoEXT);
}
if (RenderPassCreateInfo.attachmentCount > 0)
{
RenderPassCreateInfo.pAttachments = (VkAttachmentDescription*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentDescription) * RenderPassCreateInfo.attachmentCount;
}
if (RenderPassCreateInfo.dependencyCount > 0)
{
RenderPassCreateInfo.pDependencies = (VkSubpassDependency*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkSubpassDependency) * RenderPassCreateInfo.dependencyCount;
}
VkSubpassDescription* SubpassDescriptions = new VkSubpassDescription[RenderPassCreateInfo.subpassCount];
for (uint32_t Idx = 0; Idx < RenderPassCreateInfo.subpassCount; ++Idx)
{
COPY_FROM_BUFFER(&SubpassDescriptions[Idx], PSO, MemoryOffset, sizeof(VkSubpassDescription));
if (SubpassDescriptions[Idx].colorAttachmentCount > 0)
{
SubpassDescriptions[Idx].pColorAttachments = (VkAttachmentReference*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentReference) * SubpassDescriptions[Idx].colorAttachmentCount;
}
if (SubpassDescriptions[Idx].inputAttachmentCount > 0)
{
SubpassDescriptions[Idx].pInputAttachments = (VkAttachmentReference*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentReference) * SubpassDescriptions[Idx].inputAttachmentCount;
}
bool bHasResolveAttachment;
COPY_FROM_BUFFER(&bHasResolveAttachment, PSO, MemoryOffset, sizeof(bool));
if (bHasResolveAttachment)
{
if (SubpassDescriptions[Idx].colorAttachmentCount > 0)
{
SubpassDescriptions[Idx].pResolveAttachments = (VkAttachmentReference*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentReference) * SubpassDescriptions[Idx].colorAttachmentCount;
}
}
bool bHasDepthStencilAttachment;
COPY_FROM_BUFFER(&bHasDepthStencilAttachment, PSO, MemoryOffset, sizeof(bool));
if (bHasDepthStencilAttachment)
{
SubpassDescriptions[Idx].pDepthStencilAttachment = (VkAttachmentReference*)&PSO[MemoryOffset];
MemoryOffset += sizeof(VkAttachmentReference);
}
}
RenderPassCreateInfo.pSubpasses = SubpassDescriptions;
Result = vkCreateRenderPass(Device, &RenderPassCreateInfo, nullptr, &RenderPass);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " vkCreateRenderPass2 Failed %d ", Result);
exit(-1);
}
delete[] SubpassDescriptions;
}
CreateInfo.renderPass = RenderPass;
VkPipeline Pipeline;
VkShaderModule VSModule;
VkShaderModule PSModule;
{
VkShaderModuleCreateInfo ModuleCreateInfo;
ModuleCreateInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
ModuleCreateInfo.pCode = (const uint32_t *)VS;
ModuleCreateInfo.codeSize = VSSize;
ModuleCreateInfo.flags = 0;
ModuleCreateInfo.pNext = nullptr;
Result = vkCreateShaderModule(Device, &ModuleCreateInfo, nullptr, &VSModule);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " vkCreateShaderModule VS Failed %d ", Result);
exit(-1);
}
ShaderStages[0].module = VSModule;
}
{
VkShaderModuleCreateInfo ModuleCreateInfo;
ModuleCreateInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
ModuleCreateInfo.pCode = (const uint32_t*)PS;
ModuleCreateInfo.codeSize = PSSize;
ModuleCreateInfo.flags = 0;
ModuleCreateInfo.pNext = nullptr;
Result = vkCreateShaderModule(Device, &ModuleCreateInfo, nullptr, &PSModule);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " vkCreateShaderModule PS Failed %d ", Result);
exit(-1);
}
ShaderStages[1].module = PSModule;
}
if (PipelineCache == VK_NULL_HANDLE)
{
VkPipelineCacheCreateInfo PipelineCacheCreateInfo;
memset(&PipelineCacheCreateInfo, 0, sizeof(VkPipelineCacheCreateInfo));
PipelineCacheCreateInfo.flags = 0;
PipelineCacheCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
PipelineCacheCreateInfo.pInitialData = PSOCacheDataSource;
PipelineCacheCreateInfo.initialDataSize = PSOCacheDataSourceSize;
Result = vkCreatePipelineCache(Device, &PipelineCacheCreateInfo, nullptr, &PipelineCache);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " vkCreatePipelineCache Failed %d ", Result);
exit(-1);
}
}
Result = vkCreateGraphicsPipelines(Device, PipelineCache, 1, &CreateInfo, nullptr, &Pipeline);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " vkCreateGraphicsPipelines Failed %d ", Result);
exit(-1);
}
for (uint32_t Idx = 0; Idx < PipelineLayoutCreateInfo.setLayoutCount; ++Idx)
{
vkDestroyDescriptorSetLayout(Device, DescriptorSetLayouts[Idx], nullptr);
}
vkDestroyShaderModule(Device, VSModule, nullptr);
vkDestroyShaderModule(Device, PSModule, nullptr);
vkDestroyRenderPass(Device, RenderPass, nullptr);
vkDestroyPipelineLayout(Device, PipelineLayout, nullptr);
vkDestroyPipeline(Device, Pipeline, nullptr);
if (DescriptorSetLayoutInfos)
{
delete[] DescriptorSetLayoutInfos;
}
if (DescriptorSetLayouts)
{
delete[] DescriptorSetLayouts;
}
END_TRACE();
return errorLog;
}
// If BinaryData == nullptr then SizeINOUT is set.
// If BinaryData != nullptr it is filled with the output, SizeINOUT specifies the size of BinaryData.
void GetPSOBinary(char* BinaryData, uint32_t& SizeINOUT)
{
size_t Size = 0;
if(BinaryData == nullptr)
{
BEGIN_TRACE("GetPSOBinarySize");
VkResult Result = vkGetPipelineCacheData(Device, PipelineCache, &Size, nullptr);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " vkGetPipelineCacheData 1 Failed %d ", Result);
exit(-1);
}
SizeINOUT = (uint32_t)Size;
}
else
{
BEGIN_TRACE("GetPSOBinaryData");
Size = SizeINOUT;
VkResult Result = vkGetPipelineCacheData(Device, PipelineCache, &Size, BinaryData);
if (Result != VK_SUCCESS)
{
LOG_ERROR( " vkGetPipelineCacheData 2 Failed %d (%d,%zu)", Result, SizeINOUT, Size);
exit(-1);
}
SizeINOUT = (uint32_t)Size;
DestroyPipelineCache();
}
END_TRACE();
}
};
static void SetAffinity(pid_t ThreadId, const cpu_set_t& DesiredAffinitySet)
{
int rescode = sched_setaffinity(ThreadId, sizeof(DesiredAffinitySet), &DesiredAffinitySet);
if (rescode)
{
LOG_ERROR("set affinity %d, %d, %x, errno %d", rescode, ThreadId, *((int*)&DesiredAffinitySet), errno);
}
#ifndef NDEBUG
cpu_set_t TestAffinitySet;
CPU_ZERO(&TestAffinitySet);
rescode = sched_getaffinity(ThreadId, sizeof(TestAffinitySet), &TestAffinitySet);
LOG_VERBOSE("affinity Info: tid %d, desired %x, set %x, rescode %d, errno %d", ThreadId, *((int*)&DesiredAffinitySet), *((int*)&TestAffinitySet), rescode, errno);
#endif
}
static void SetAffinityAllThreads(const cpu_set_t& DesiredAffinity)
{
// this is required as some drivers have additional threads which need the same treatment.
// we dont know what they are so all threads get hit, any new threads inherit the current settings.
DIR* SelfTaskDirectory;
struct dirent* Entry;
static const char ThreadDir[] = "/proc/self/task";
SelfTaskDirectory = opendir(ThreadDir);
if (SelfTaskDirectory != NULL)
{
while ((Entry = readdir(SelfTaskDirectory)))
{
pid_t tid = strtol(Entry->d_name, nullptr, 10);
if (tid)
{
SetAffinity(tid, DesiredAffinity);
}
}
closedir(SelfTaskDirectory);
}
else
{
LOG_ERROR("set affinity failed to find thread dir %s", ThreadDir);
SetAffinity(0, DesiredAffinity);
}
}
JNI_METHOD void Java_com_epicgames_unreal_psoservices_PSOProgramService_NativeSetThreadPriority(JNIEnv* jenv, jobject thiz, jlong PriInfoIn)
{
struct PrecompilePriInfo
{
PrecompilePriInfo(uint64_t InfoIn) : PriInfo(InfoIn) {}
bool ShouldSetSchedPolicy() const { return PriInfo & (1 << 0); }
bool ShouldSetNice() const { return PriInfo & (1 << 1); }
bool ShouldSetAffinity() const { return PriInfo & (1 << 2); }
char GetSchedPolicy() const { return (PriInfo << 8) & 0xff; }
char GetSchedPolicyPri() const { return ((PriInfo << 16) & 0xff) - 128; }
char GetNice() const { return ((PriInfo << 24) & 0xff) - 128; }
uint32_t GetAffinity() const { return (PriInfo >> 32) & 0xFFFFFFFF; }
uint64_t PriInfo = 0;
};
PrecompilePriInfo PriInfo(PriInfoIn);
if(PriInfo.ShouldSetSchedPolicy())
{
int InitialPolicy;
int NewPolicy = PriInfo.GetSchedPolicy();
int SchedPri = PriInfo.GetSchedPolicyPri();
struct sched_param Sched = { };
pthread_t InThread = pthread_self();
int getres = pthread_getschedparam(InThread, &InitialPolicy, &Sched);
int primax = sched_get_priority_max(NewPolicy);
int primin = sched_get_priority_min(NewPolicy);
Sched.sched_priority = SchedPri < primin ? primin : (SchedPri > primax ? primax : SchedPri);
LOG_VERBOSE("tinfo initial policy %d, desired %d, getres %d, errno %d, pridesired %d, primin %d primax %d", InitialPolicy, NewPolicy, getres, errno, Sched.sched_priority, primin, primax);
int rescode = sched_setscheduler(0, NewPolicy, &Sched);
if (rescode)
{
LOG_ERROR("setsched error %d, errno %d", rescode, errno);
}
}
if (PriInfo.ShouldSetNice())
{
int Nice = PriInfo.GetNice();
int InitialNice = getpriority(PRIO_PROCESS, 0);
int rescode = setpriority(PRIO_PROCESS, 0, Nice);
int resultNice = getpriority(PRIO_PROCESS, 0);
if (rescode)
{
LOG_ERROR("setpriority failed. initial nice %d, desired %d, res %d, errno %d, result %d ", InitialNice, Nice, rescode, errno, resultNice);
}
}
if (PriInfo.ShouldSetAffinity())
{
const uint32_t AffinityMask = PriInfo.GetAffinity();
cpu_set_t DesiredAffinitySet;
CPU_ZERO(&DesiredAffinitySet);
if (AffinityMask == 0xFFFFFFFF)
{
memset(&DesiredAffinitySet, 0xff, sizeof(DesiredAffinitySet));
}
else
{
for (int i = 0; i < 32; i++)
{
if (AffinityMask & (1 << i))
{
CPU_SET(i, &DesiredAffinitySet);
}
}
}
SetAffinityAllThreads(DesiredAffinitySet);
}
}
JNI_METHOD void Java_com_epicgames_unreal_psoservices_PSOProgramService_InitVKDevice(JNIEnv* jenv, jobject thiz)
{
}
JNI_METHOD void Java_com_epicgames_unreal_psoservices_PSOProgramService_ShutdownVKDevice(JNIEnv* jenv, jobject thiz)
{
FVulkanPSOCompiler::Get().ShutDownDevice();
}
static const int GExitAfterJobCount = 0;
int GJobCount = 0;
void ExitTest()
{
if (GExitAfterJobCount)
{
if (GJobCount == GExitAfterJobCount)
{
LOG_ERROR( " exit test! ");
exit(-1);
}
GJobCount++;
}
}
JNI_METHOD jobject Java_com_epicgames_unreal_psoservices_PSOProgramService_CompileVKGFXPSO(JNIEnv* jenv, jobject thiz, jbyteArray jVS, jbyteArray jPS, jbyteArray jPSO, jbyteArray jPSOCacheDataSource, jfloatArray jCompilationDuration)
{
ExitTest();
double CompilationStartTime = now_s();
const uint8_t* VS = (const uint8_t*)jenv->GetByteArrayElements(jVS, nullptr);
uint64_t VSSize = jenv->GetArrayLength(jVS);
const uint8_t* PS = (const uint8_t*)jenv->GetByteArrayElements(jPS, nullptr);
uint64_t PSSize = jenv->GetArrayLength(jPS);
const uint8_t* PSO = (const uint8_t*)jenv->GetByteArrayElements(jPSO, nullptr);
uint64_t PSOSize = jenv->GetArrayLength(jPSO);
const uint8_t* PSOCacheDataSource = (const uint8_t*)jenv->GetByteArrayElements(jPSOCacheDataSource, nullptr);
uint64_t PSOCacheDataSourceSize = jenv->GetArrayLength(jPSOCacheDataSource);
FVulkanPSOCompiler::Get().CompileGFXPSO(VS, VSSize, PS, PSSize, PSO, PSOSize, PSOCacheDataSource, PSOCacheDataSourceSize);
uint32_t Size = 0;
FVulkanPSOCompiler::Get().GetPSOBinary(nullptr, Size);
jbyteArray Data = jenv->NewByteArray(Size);
if (Size > 0)
{
char* BinaryData = (char*)malloc(Size);
FVulkanPSOCompiler::Get().GetPSOBinary(BinaryData, Size);
jenv->SetByteArrayRegion(Data, 0, Size, (jbyte*)BinaryData);
free(BinaryData);
}
double CompilationDuration = now_s() - CompilationStartTime;
float *CDA = jenv->GetFloatArrayElements(jCompilationDuration, nullptr);
if (CDA != nullptr)
{
CDA[0] = (float)CompilationDuration;
jenv->ReleaseFloatArrayElements(jCompilationDuration, CDA, 0);
}
return Data;
}
// the shared mem version takes an FD and a bunch of offsets.
// another shared FD containing the result is returned.
JNI_METHOD jint Java_com_epicgames_unreal_psoservices_PSOProgramService_CompileVKGFXPSOSHM(JNIEnv* jenv, jobject thiz, jint SHMemFD, jlong jVSSize, jlong jPSSize, jlong jPSOSize, jlong jPSOCacheDataSourceSize, jfloatArray jCompilationDuration)
{
ExitTest();
double CompilationStartTime = now_s();
{
BEGIN_TRACE("CompileVKGFXPSOSHM");
BEGIN_TRACE("CompileVKGFXPSOSHM_1");
LOG_VERBOSE("SHMemFD %d ", SHMemFD);
size_t memSize = ASharedMemory_getSize(SHMemFD);
uint8_t* ParamsSharedBuffer = (uint8_t*)mmap(NULL, memSize, PROT_READ, MAP_SHARED, SHMemFD, 0);
if (ParamsSharedBuffer == nullptr)
{
LOG_ERROR( "failed to map %zu input bytes (%d, %d)", memSize, SHMemFD, errno);
exit(-1);
}
LOG_VERBOSE("ParamsSharedBuffer %zu, %p ", memSize, ParamsSharedBuffer);
uint32_t CurrOffset = 0;
const uint8_t* VS = (const uint8_t*)ParamsSharedBuffer;
uint64_t VSSize = jVSSize;
CurrOffset += VSSize;
LOG_VERBOSE("vs %lu", VSSize);
const uint8_t* PS = (const uint8_t*)ParamsSharedBuffer + CurrOffset;
uint64_t PSSize = jPSSize;
CurrOffset += PSSize;
LOG_VERBOSE("ps %lu", PSSize);
const uint8_t* PSO = (const uint8_t*)ParamsSharedBuffer + CurrOffset;
uint64_t PSOSize = jPSOSize;
CurrOffset += PSOSize;
LOG_VERBOSE("PSO %lu", PSOSize);
const uint8_t* PSOCacheDataSource = (const uint8_t*)ParamsSharedBuffer + CurrOffset;
uint64_t PSOCacheDataSourceSize = jPSOCacheDataSourceSize;
LOG_VERBOSE("PSOCacheDataSourceSize %lu", PSOCacheDataSourceSize);
END_TRACE();
FVulkanPSOCompiler::Get().CompileGFXPSO(VS, VSSize, PS, PSSize, PSO, PSOSize, PSOCacheDataSource, PSOCacheDataSourceSize);
munmap(ParamsSharedBuffer, memSize);
END_TRACE();
}
BEGIN_TRACE("CompileVKGFXPSOSHM_GB");
uint32_t Size = 0;
FVulkanPSOCompiler::Get().GetPSOBinary(nullptr, Size);
static const uint32_t PageSize = sysconf(_SC_PAGESIZE);
uint32_t AllocSize = Size + sizeof(Size);
uint32_t AlignedSize = (((uint32_t)AllocSize + PageSize - 1) & ~(PageSize - 1));
int SharedMemOutputFD = ASharedMemory_create("", AlignedSize);
if( SharedMemOutputFD != -1)
{
BEGIN_TRACE("CompileVKGFXPSOSHM_GB_1");
char* OutputSharedBuffer = (char*)mmap(NULL, AlignedSize, PROT_READ | PROT_WRITE, MAP_SHARED, SharedMemOutputFD, 0);
END_TRACE();
if (OutputSharedBuffer == nullptr)
{
LOG_ERROR( "out map failed (%d), shm %d, size %d, alloc %d", errno, SharedMemOutputFD, Size, AlignedSize);
exit(-1);
}
memcpy(OutputSharedBuffer, &Size, sizeof(Size));
FVulkanPSOCompiler::Get().GetPSOBinary(OutputSharedBuffer + sizeof(Size), Size);
BEGIN_TRACE("CompileVKGFXPSOSHM_GB_3");
// limit access to read only
ASharedMemory_setProt(SharedMemOutputFD, PROT_READ);
LOG_VERBOSE("success, shm %d, size %d, alloc %d", SharedMemOutputFD, Size, AlignedSize);
munmap(OutputSharedBuffer, AlignedSize);
END_TRACE();
}
else
{
LOG_ERROR( "Mem alloc %d bytes failed (errno %d) ", AllocSize, errno);
}
double CompilationDuration = now_s() - CompilationStartTime;
float* CDA = jenv->GetFloatArrayElements(jCompilationDuration, nullptr);
if (CDA != nullptr)
{
CDA[0] = (float)CompilationDuration;
jenv->ReleaseFloatArrayElements(jCompilationDuration, CDA, 0);
}
END_TRACE();
return SharedMemOutputFD;
}