Files
UnrealEngine/Engine/Source/ThirdParty/NVIDIA/NVaftermath/include/GFSDK_Aftermath_GpuCrashDump.h
2025-05-18 13:04:45 +08:00

478 lines
22 KiB
C

/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*/
/*
* █████ █████ ██████ ████ ████ ███████ ████ ██████ ██ ██
* ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
* ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
* ██████ ████ ██ ████ █████ ██ ██ ██ ██████ ██ ███████
* ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
* ██ ██ ██ ██ █████ ██ ██ ██ ██ ██ ██ ██ ██ ██ DEBUGGER
* ██ ██
* ████████████████████████████████████████████████████████ ██ █ ██ ████████████
*
*
* HOW TO USE AFTERMATH GPU CRASH DUMP COLLECTION:
* -----------------------------------------------
*
* 1) Call 'GFSDK_Aftermath_EnableGpuCrashDumps', to enable GPU crash dump collection.
* This must be done before any other library calls are made and before any D3D
* device is created by the application.
*
* With this call the application can register a callback function that is invoked
* with the GPU crash dump data once a TDR/hang occurs. In addition, it is also
* possible to provide optional callback functions for collecting shader debug
* information and for providing additional descriptive data from the application to
* include in the crash dump.
*
* Enabling GPU crash dumps will also override any settings from an also active
* Nsight Graphics GPU crash dump monitor for the calling process.
*
*
* 2) On DX11/DX12, call 'GFSDK_Aftermath_DXxx_Initialize', to initialize the library and
* to enable additional Aftermath features that will affect the data captured in
* the GPU crash dumps, such as Aftermath event markers, automatic call stack
* markers, resource tracking, shader debug information, or additional shader
* error reporting. See 'GFSDK_Aftermath.h' for more details.
*
* On Vulkan use the 'VK_NV_device_diagnostics_config' extension to enable
* additional Aftermath features, such as automatic call stack markers, resource
* tracking, shader debug information, or additional shader error reporting. See
* 'Readme.md' for more details.
*
*
* 4) Before the application shuts down, call 'GFSDK_Aftermath_DisableGpuCrashDumps' to
* disable GPU crash dump collection.
*
* Disabling GPU crash dumps will also re-establish any settings from an also active
* Nsight Graphics GPU crash dump monitor for the calling process.
*
*
* 5) If the application detects a potential GPU crash, i.e., device removed/lost,
* call 'GFSDK_Aftermath_GetCrashDumpStatus' to check the GPU crash dump status.
* The application should then wait until Aftermath has finished processing the
* crash dump before releasing the device or exiting.
*
* The recommended process for handling device removed/lost events is as follows:
*
* a) Call 'GFSDK_Aftermath_GetCrashDumpStatus' to check the GPU crash dump status.
*
* b) If the status is "Unknown", this means the graphics driver does not support
* the crash dump status query feature. But it is still possible to receive the
* "Finished" or "Failed" status. The application should continue to poll the
* status as described in step d) below.
*
* c) If the status is not "Unknown" and not "NotStarted", this means Aftermath
* has detected the GPU crash.
*
* d) The application should wait for a few seconds to allow the Aftermath
* graphics driver thread to collect the GPU crash dump data. Start polling the
* status until the crash dump data has been collected and the notification
* callback has been processed by the application, or a timeout of a couple of
* seconds has expired.
*
* e) If the timeout expires or the status returns "Finished" or "Failed" you
* should continue handling the device lost event as normal (e.g., release
* the device and/or terminate the application).
*
* Pseudo code implementing the above steps:
*
* if (deviceLost)
* {
* // Check Aftermath crash dump status
* GFSDK_Aftermath_CrashDump_Status status = GFSDK_Aftermath_CrashDump_Status_Unknown;
* AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GetCrashDumpStatus(&status));
*
* // Loop while Aftermath crash dump data collection has not finished or
* // the application is still processing the crash dump data.
* while (status != GFSDK_Aftermath_CrashDump_Status_CollectingDataFailed &&
* status != GFSDK_Aftermath_CrashDump_Status_Finished &&
* !timeout)
* {
* // Wait for a couple of milliseconds, and poll the crash dump status again.
* Sleep(50);
* GFSDK_Aftermath_GetCrashDumpStatus(&status);
* }
*
* HandleDeviceLost();
* }
*
*
* OPTIONAL:
*
* o) (Optional) Instrument the application with event markers as described in
* 'GFSDK_Aftermath.h'.
*
* o) (Optional, DX12-Only) Register DX12 resource pointers with Aftermath as
* described in 'GFSDK_Aftermath.h'.
*
*
* PERFORMANCE TIPS:
*
* o) Enabling shader debug information creation will introduce shader compile
* time overhead as well as memory overhead for handling the debug information.
*
* o) User event markers cause considerable overhead and should be used very
* carefully.
*
* o) Automatic call stack markers for draw calls, compute and ray tracing
* dispatches, acceleration structure building, and copy operations provide a
* less intrusive alternative to manually injecting event markers for every
* command. However, they are in general even more expensive in terms of CPU
* overhead and should be avoided in shipping applications.
*
*/
#ifndef GFSDK_Aftermath_GpuCrashDump_H
#define GFSDK_Aftermath_GpuCrashDump_H
#include "GFSDK_Aftermath_Defines.h"
#pragma pack(push, 8)
#ifdef __cplusplus
extern "C" {
#endif
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags
// ---------------------------------
//
// Flags to configure for which graphics APIs to enable GPU crash dumps.
//
/////////////////////////////////////////////////////////////////////////
GFSDK_AFTERMATH_DECLARE_ENUM(GpuCrashDumpWatchedApiFlags){
// Default setting - GPU crash dump tracking disabled.
GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_None = 0x0,
// Enable GPU crash dump tracking for the DX API.
GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_DX = 0x1,
// Enable GPU crash dump tracking for the Vulkan API.
GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan = 0x2,
};
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_GpuCrashDumpFeatureFlags
// ---------------------------------
//
// Flags to configure GPU crash dump-specific Aftermath features.
//
/////////////////////////////////////////////////////////////////////////
GFSDK_AFTERMATH_DECLARE_ENUM(GpuCrashDumpFeatureFlags){
// Default settings
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default = 0x0,
// Defer shader debug information callbacks until an actual GPU crash dump
// is generated and execute shader debug information callbacks only for the
// shaders related to the crash dump.
//
// NOTE: Using this option will increase the memory footprint of the
// application.
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_DeferDebugInfoCallbacks = 0x1,
};
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_GpuCrashDumpDescriptionKey
// ---------------------------------
//
// Key definitions for user-defined GPU crash dump description.
//
/////////////////////////////////////////////////////////////////////////
GFSDK_AFTERMATH_DECLARE_ENUM(GpuCrashDumpDescriptionKey){
// Predefined key for application name.
GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName = 0x1,
// Predefined key for application version.
GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationVersion = 0x2,
// Base key for creating user-defined key-value pairs. Any value greater or equal
// to 'GFSDK_Aftermath_GpuCrashDumpDescriptionKey_UserDefined' will create a
// user-defined key-value pair.
GFSDK_Aftermath_GpuCrashDumpDescriptionKey_UserDefined = 0x10000,
};
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_CrashDump_Status
// ---------------------------------
//
// Aftermath GPU crash dump generation progress status.
//
// Applications are expected to check the status of the Aftermath crash dump
// generation progress if they detect a potential GPU crash, i.e., device
// removed/lost. For more details, see the description on how to set up an application
// for GPU crash dump collection at the beginning of this file.
//
/////////////////////////////////////////////////////////////////////////
GFSDK_AFTERMATH_DECLARE_ENUM(CrashDump_Status){
// No GPU crash has been detected by Aftermath, so far.
GFSDK_Aftermath_CrashDump_Status_NotStarted = 0,
// A GPU crash happened and was detected by Aftermath. Aftermath started to
// collect crash dump data.
GFSDK_Aftermath_CrashDump_Status_CollectingData,
// Aftermath failed to collect crash dump data. No further callback will be
// invoked.
GFSDK_Aftermath_CrashDump_Status_CollectingDataFailed,
// Aftermath is invoking the 'gpuCrashDumpCb' callback after collecting the crash
// dump data successfully.
GFSDK_Aftermath_CrashDump_Status_InvokingCallback,
// The 'gpuCrashDumpCb' callback returned and Aftermath finished processing the
// GPU crash. The application should now continue with handling the device
// removed/lost situation.
GFSDK_Aftermath_CrashDump_Status_Finished,
// Unknown problem - likely using an older driver incompatible with this
// Aftermath feature.
GFSDK_Aftermath_CrashDump_Status_Unknown,
};
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_AddGpuCrashDumpDescription
// ---------------------------------
//
// Function for adding user-defined description key-value pairs used by
// 'GFSDK_Aftermath_GpuCrashDumpDescriptionCb'.
//
// Key must be one of the predefined keys of
// 'GFSDK_Aftermath_GpuCrashDumpDescriptionKey' or a user-defined key based on
// 'GFSDK_Aftermath_GpuCrashDumpDescriptionKey_UserDefined'. All keys greater than
// the last predefined key in 'GFSDK_Aftermath_GpuCrashDumpDescriptionKey' and
// smaller than 'GFSDK_Aftermath_GpuCrashDumpDescriptionKey_UserDefined' are
// considered illegal and ignored.
//
/////////////////////////////////////////////////////////////////////////
typedef void(GFSDK_AFTERMATH_CALL* PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription)(uint32_t key, const char* value);
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_GpuCrashDumpCb
// ---------------------------------
//
// GPU crash dump callback.
//
// If registered via 'GFSDK_Aftermath_EnableGpuCrashDumps' it will be called with the
// crash dump data when a GPU crash is detected by Aftermath. See the description of
// 'GFSDK_Aftermath_EnableGpuCrashDumps' for more details.
//
// NOTE: Except for the 'pUserData' pointer, all pointer values passed to the
// callbacks are only valid for the duration of the call! An implementation
// must make copies of the data if it intends to store it beyond that.
//
/////////////////////////////////////////////////////////////////////////
typedef void(GFSDK_AFTERMATH_CALL* PFN_GFSDK_Aftermath_GpuCrashDumpCb)(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize, void* pUserData);
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_ShaderDebugInfoCb
// ---------------------------------
//
// Shader debug information callback.
//
// If registered via 'GFSDK_Aftermath_EnableGpuCrashDumps' it will be called with
// shader debug information (line tables for mapping from the shader IL passed to the
// driver to the shader microcode) if the shader debug information generation feature
// is enabled: 'GFSDK_Aftermath_FeatureFlags_GenerateShaderDebugInfo' or
// 'VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_ERROR_REPORTING_BIT_NV'. Also see the
// description of 'GFSDK_Aftermath_EnableGpuCrashDumps' for more details.
//
// NOTE: Except for the 'pUserData' pointer, all pointer values passed to the
// callbacks are only valid for the duration of the call! An implementation
// must make copies of the data if it intends to store it beyond that.
//
/////////////////////////////////////////////////////////////////////////
typedef void(GFSDK_AFTERMATH_CALL* PFN_GFSDK_Aftermath_ShaderDebugInfoCb)(const void* pShaderDebugInfo, const uint32_t shaderDebugInfoSize, void* pUserData);
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_GpuCrashDumpDescriptionCb
// ---------------------------------
//
// Crash dump description callback.
//
// If registered via 'GFSDK_Aftermath_EnableGpuCrashDumps' it will be called during
// GPU crash dump generation, i.e., before 'GFSDK_Aftermath_GpuCrashDumpCb' is called,
// and allows the application to provide additional information to be captured in the
// crash dump by calling the provided 'addValue' function. See the description of
// 'GFSDK_Aftermath_EnableGpuCrashDumps' for more details.
//
/////////////////////////////////////////////////////////////////////////
typedef void(GFSDK_AFTERMATH_CALL* PFN_GFSDK_Aftermath_GpuCrashDumpDescriptionCb)(PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription addValue, void* pUserData);
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_ResolveMarkerCb
// ---------------------------------
//
// Marker data resolution callback.
//
// If registered via 'GFSDK_Aftermath_EnableGpuCrashDumps' it will be called during
// GPU crash dump data generation, i.e., before 'GFSDK_Aftermath_GpuCrashDumpCb' is
// called, when a DX event marker or a Vulkan checkpoint will be recorded into the
// crash dump. See the description of 'GFSDK_Aftermath_EnableGpuCrashDumps' for more details.
//
// NOTE: Except for the 'pUserData' pointer, all pointer values passed to the
// callbacks are only valid for the duration of the call! The application must ensure
// that the pointer returned through 'ppResolvedMarkerData' is valid after returning
// from the callback. Then, the GPU crash dump data collection process will make an
// internal copy of it. So, it's safe to reuse the memory in the next call, i.e., it's
// OK to store the memory blob where the 'ppResolvedMarkerData' is pointed to in static memory.
//
/////////////////////////////////////////////////////////////////////////
typedef void(GFSDK_AFTERMATH_CALL* PFN_GFSDK_Aftermath_ResolveMarkerCb)(const void* pMarkerData, const uint32_t markerDataSize, void* pUserData, void** ppResolvedMarkerData, uint32_t* pResolvedMarkerDataSize);
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_EnableGpuCrashDumps
// ---------------------------------
//
// apiVersion;
// Must be set to 'GFSDK_Aftermath_Version_API'. Used for checking against
// library version.
//
// watchedApis;
// Controls which graphics APIs to watch for crashes. A combination of
// 'GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags'.
//
// flags;
// Controls GPU crash dump specific behavior. A combination of
// 'GFSDK_Aftermath_GpuCrashDumpFeatureFlags'.
//
// gpuCrashDumpCb;
// Callback function to be called when new GPU crash dump data is available.
//
// NOTE: This callback is free-threaded, ensure the provided function is
// thread-safe.
//
// shaderDebugInfoCb;
// Optional, can be NULL.
//
// Callback function to be called when new shader debug information data is
// available. Shader debug information generation needs to be enabled by
// setting the corresponding feature flags:
// * For DX: 'GFSDK_Aftermath_FeatureFlags_GenerateShaderDebugInfo'
// * For Vulkan: 'VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_ERROR_REPORTING_BIT_NV'
//
// NOTE: Shader debug information is only supported for DX12 (DXIL) and Vulkan
// (SPIR-V) shaders.
//
// NOTE: If not using 'GFSDK_Aftermath_GpuCrashDumpFeatureFlags_DeferDebugInfoCallbacks',
// 'shaderDebugInfoCb' will be invoked for every shader compilation by the
// graphics driver, even if there will be never an invocation of 'gpuCrashDumpCb'.
//
// NOTE: This callback is free-threaded, ensure the provided function is
// thread-safe.
//
// descriptionCb;
// Optional, can be NULL.
//
// Callback function that allows the application to provide additional
// descriptive values to be include in crash dumps. This will be called before
// 'gpuCrashDumpCb'.
//
// NOTE: This callback is free-threaded, ensure the provided function is
// thread-safe.
//
// resolveMarkerCb;
// Optional, can be NULL.
//
// Callback function to be called when the crash dump data generation encounters
// an event marker with a size of zero. This means that
// 'GFSDK_Aftermath_SetEventMarker' was called with 'markerDataSize = 0', meaning
// that the marker payload itself is managed by the application rather than
// copied by Aftermath internally. All Vulkan markers set using the
// 'NV_device_diagnostic_checkpoints' extension are application-managed as well.
// This callback allows the application to pass the marker's associated data
// back to the crash dump generation process to be included in the crash dump
// data. The application should set the value of 'ppResolvedMarkerData' to the
// pointer of the marker's data, and set the value of 'markerSize' to the size
// of the marker's data in bytes.
//
// NOTE: Applications must ensure that the marker data memory passed back via
// 'ppResolvedMarkerData' will remain valid for the entirety of the crash dump
// generation process, i.e., until 'gpuCrashDumpCb' is called.
//
// NOTE: This callback is only supported on R495 or later NVIDIA graphics drivers. If
// the application is running on a system using an earlier driver version, it will
// be ignored.
//
// NOTE: This callback is free-threaded, ensure the provided function is
// thread-safe.
//
// pUserData;
// Optional, can be NULL.
//
// User data pointer passed to the callbacks.
//
//// DESCRIPTION;
// Device independent initialization call to enable Aftermath GPU crash dump
// creation. This function must be called before any D3D or Vulkan device is
// created by the application.
//
// NOTE: This overrides any settings from an also active GPU crash dump monitor
// for this process!
//
/////////////////////////////////////////////////////////////////////////
GFSDK_Aftermath_API GFSDK_Aftermath_EnableGpuCrashDumps(
GFSDK_Aftermath_Version apiVersion,
uint32_t watchedApis,
uint32_t flags,
PFN_GFSDK_Aftermath_GpuCrashDumpCb gpuCrashDumpCb,
PFN_GFSDK_Aftermath_ShaderDebugInfoCb shaderDebugInfoCb,
PFN_GFSDK_Aftermath_GpuCrashDumpDescriptionCb descriptionCb,
PFN_GFSDK_Aftermath_ResolveMarkerCb resolveMarkerCb,
void* pUserData);
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_DisableGpuCrashDumps
// ---------------------------------
//
//// DESCRIPTION;
// Device independent call to disable Aftermath GPU crash dump creation.
// Re-enables settings from an also active GPU crash dump monitor for the
// current process!
//
/////////////////////////////////////////////////////////////////////////
GFSDK_Aftermath_API GFSDK_Aftermath_DisableGpuCrashDumps();
/////////////////////////////////////////////////////////////////////////
// GFSDK_Aftermath_GetCrashDumpStatus
// ---------------------------------
//
// pOutStatus;
// OUTPUT: Crash dump status.
//
//// DESCRIPTION;
// If the application detects a potential crash (i.e., device
// removed/lost event), applications are expected to check the status of
// the Aftermath crash dump generation progress. This function allows to
// query the status of the GPU crash detection and the crash dump data
// collection. For more details, see 'GFSDK_Aftermath_CrashDump_Status'
// and the description of how to set up an application for GPU crash dump
// collection at the beginning of this file.
//
/////////////////////////////////////////////////////////////////////////
GFSDK_Aftermath_API GFSDK_Aftermath_GetCrashDumpStatus(GFSDK_Aftermath_CrashDump_Status* pOutStatus);
/////////////////////////////////////////////////////////////////////////
//
// Function pointer definitions - if dynamic loading is preferred.
//
/////////////////////////////////////////////////////////////////////////
GFSDK_Aftermath_PFN(GFSDK_AFTERMATH_CALL* PFN_GFSDK_Aftermath_EnableGpuCrashDumps)(GFSDK_Aftermath_Version apiVersion, uint32_t watchedApis, uint32_t flags, PFN_GFSDK_Aftermath_GpuCrashDumpCb gpuCrashDumpCb, PFN_GFSDK_Aftermath_ShaderDebugInfoCb shaderDebugInfoCb, PFN_GFSDK_Aftermath_GpuCrashDumpDescriptionCb descriptionCb, PFN_GFSDK_Aftermath_ResolveMarkerCb resolveMarkerCb, void* pUserData);
GFSDK_Aftermath_PFN(GFSDK_AFTERMATH_CALL* PFN_GFSDK_Aftermath_DisableGpuCrashDumps)();
GFSDK_Aftermath_PFN(GFSDK_AFTERMATH_CALL* PFN_GFSDK_Aftermath_GetCrashDumpStatus)(GFSDK_Aftermath_CrashDump_Status* pOutStatus);
#ifdef __cplusplus
} // extern "C"
#endif
#pragma pack(pop)
#endif // GFSDK_Aftermath_GpuCrashDump_H