164 lines
5.1 KiB
C++
164 lines
5.1 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
RHIGPUReadback.h: classes for managing fences and staging buffers for
|
|
asynchronous GPU memory updates and readbacks with minimal stalls and no
|
|
RHI thread flushes
|
|
=============================================================================*/
|
|
|
|
#pragma once
|
|
|
|
#include "RHI.h"
|
|
#include "DynamicRHI.h"
|
|
#include "RHICommandList.h"
|
|
|
|
/**
|
|
* FRHIGPUMemoryReadback: Represents a memory readback request scheduled with CopyToStagingBuffer
|
|
* Wraps a staging buffer with a FRHIGPUFence for synchronization.
|
|
*/
|
|
class FRHIGPUMemoryReadback
|
|
{
|
|
public:
|
|
|
|
FRHIGPUMemoryReadback(FName RequestName)
|
|
{
|
|
Fence = RHICreateGPUFence(RequestName);
|
|
LastLockGPUIndex = 0;
|
|
}
|
|
|
|
virtual ~FRHIGPUMemoryReadback() {}
|
|
|
|
/** Indicates if the data is in place and ready to be read. */
|
|
FORCEINLINE bool IsReady()
|
|
{
|
|
return !Fence || (Fence->NumPendingWriteCommands.GetValue() == 0 && Fence->Poll());
|
|
}
|
|
|
|
/** Indicates if the data is in place and ready to be read on a subset of GPUs. */
|
|
FORCEINLINE bool IsReady(FRHIGPUMask GPUMask)
|
|
{
|
|
return !Fence || Fence->Poll(GPUMask);
|
|
}
|
|
|
|
void Wait(FRHICommandListImmediate& RHICmdList, FRHIGPUMask GPUMask) const
|
|
{
|
|
if (Fence)
|
|
{
|
|
Fence->Wait(RHICmdList, GPUMask);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Copy the current state of the resource to the readback data.
|
|
* @param RHICmdList The command list to enqueue the copy request on.
|
|
* @param SourceBuffer The buffer holding the source data.
|
|
* @param NumBytes The number of bytes to copy. If 0, this will copy the entire buffer.
|
|
*/
|
|
virtual void EnqueueCopy(FRHICommandList& RHICmdList, FRHIBuffer* SourceBuffer, uint32 NumBytes = 0)
|
|
{
|
|
unimplemented();
|
|
}
|
|
|
|
virtual void EnqueueCopy(FRHICommandList& RHICmdList, FRHITexture* SourceTexture, const FIntVector& SourcePosition, uint32 SourceSlice, const FIntVector& Size)
|
|
{
|
|
unimplemented();
|
|
}
|
|
|
|
void EnqueueCopy(FRHICommandList& RHICmdList, FRHITexture* SourceTexture, FResolveRect Rect = FResolveRect())
|
|
{
|
|
FIntVector SourcePosition, Size;
|
|
|
|
if (Rect.IsValid())
|
|
{
|
|
SourcePosition = FIntVector(Rect.X1, Rect.Y1, 0);
|
|
Size = FIntVector(Rect.X2 - Rect.X1, Rect.Y2 - Rect.Y1, 1);
|
|
}
|
|
else
|
|
{
|
|
SourcePosition = FIntVector::ZeroValue;
|
|
Size = FIntVector::ZeroValue;
|
|
}
|
|
|
|
EnqueueCopy(RHICmdList, SourceTexture, SourcePosition, 0, Size);
|
|
}
|
|
|
|
/**
|
|
* Returns the CPU accessible pointer that backs this staging buffer.
|
|
* @param NumBytes The maximum number of bytes the host will read from this pointer.
|
|
* @returns A CPU accessible pointer to the backing buffer.
|
|
*/
|
|
virtual void* Lock(uint32 NumBytes) = 0;
|
|
|
|
/**
|
|
* Signals that the host is finished reading from the backing buffer.
|
|
*/
|
|
virtual void Unlock() = 0;
|
|
|
|
FORCEINLINE const FRHIGPUMask& GetLastCopyGPUMask() const { return LastCopyGPUMask; }
|
|
|
|
FName GetName() const { return Fence->GetFName(); }
|
|
|
|
protected:
|
|
|
|
FGPUFenceRHIRef Fence;
|
|
FRHIGPUMask LastCopyGPUMask;
|
|
|
|
// We need to separately track which GPU buffer was last locked. It's possible for a new copy operation to
|
|
// be enqueued (writing to LastCopyGPUMask) while the buffer is technically locked, with the unlock and enqueued
|
|
// copy on the GPU itself happening later, during pass execution in FRDGBuilder::Execute (for example, this
|
|
// happens with Nanite streaming). It's not unsafe, because the operations are occurring in order on both the
|
|
// render thread and later pass Execute, but our locking logic needs to handle that scenario.
|
|
uint32 LastLockGPUIndex;
|
|
};
|
|
|
|
/** Buffer readback implementation. */
|
|
class FRHIGPUBufferReadback final : public FRHIGPUMemoryReadback
|
|
{
|
|
public:
|
|
|
|
RHI_API FRHIGPUBufferReadback(FName RequestName);
|
|
|
|
RHI_API void EnqueueCopy(FRHICommandList& RHICmdList, FRHIBuffer* SourceBuffer, uint32 NumBytes = 0) override;
|
|
RHI_API void* Lock(uint32 NumBytes) override;
|
|
RHI_API void Unlock() override;
|
|
RHI_API uint64 GetGPUSizeBytes() const;
|
|
|
|
private:
|
|
|
|
// RHI staging buffers are single GPU -- need to be branched when using multiple GPUs
|
|
#if WITH_MGPU
|
|
FStagingBufferRHIRef DestinationStagingBuffers[MAX_NUM_GPUS];
|
|
#else
|
|
FStagingBufferRHIRef DestinationStagingBuffers[1];
|
|
#endif
|
|
};
|
|
|
|
|
|
/** Texture readback implementation. */
|
|
class FRHIGPUTextureReadback final : public FRHIGPUMemoryReadback
|
|
{
|
|
public:
|
|
RHI_API FRHIGPUTextureReadback(FName RequestName);
|
|
|
|
using FRHIGPUMemoryReadback::EnqueueCopy;
|
|
|
|
RHI_API void EnqueueCopy(FRHICommandList& RHICmdList, FRHITexture* SourceTexture, const FIntVector& SourcePosition, uint32 SourceSlice, const FIntVector& Size) override;
|
|
|
|
UE_DEPRECATED(5.0, "Use FRHIGPUTextureReadback::Lock( int32& OutRowPitchInPixels) instead.")
|
|
RHI_API void* Lock(uint32 NumBytes) override;
|
|
|
|
RHI_API void* Lock(int32& OutRowPitchInPixels, int32* OutBufferHeight = nullptr);
|
|
RHI_API void Unlock() override;
|
|
|
|
UE_DEPRECATED(5.0, "Use FRHIGPUTextureReadback::Lock( int32& OutRowPitchInPixels) instead.")
|
|
RHI_API void LockTexture(FRHICommandListImmediate& RHICmdList, void*& OutBufferPtr, int32& OutRowPitchInPixels);
|
|
|
|
RHI_API uint64 GetGPUSizeBytes() const;
|
|
|
|
#if WITH_MGPU
|
|
FTextureRHIRef DestinationStagingTextures[MAX_NUM_GPUS];
|
|
#else
|
|
FTextureRHIRef DestinationStagingTextures[1];
|
|
#endif
|
|
};
|