Files
UnrealEngine/Engine/Plugins/Experimental/NeuralRendering/Source/NeuralPostProcessing/Private/NeuralPostProcessModelInstance.cpp
2025-05-18 13:04:45 +08:00

280 lines
11 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "NeuralPostProcessModelInstance.h"
#include "NNERuntimeRDG.h"
#include "RenderGraphUtils.h"
#include "Engine/NeuralProfile.h"
TSharedPtr<UE::NNE::IModelInstanceRDG> CreateNNEModelInstance(UNNEModelData* NNEModelData, FString RuntimeName)
{
check(NNEModelData);
TWeakInterfacePtr<INNERuntime> Runtime = UE::NNE::GetRuntime<INNERuntime>(RuntimeName);
TWeakInterfacePtr<INNERuntimeRDG> RuntimeRDG = UE::NNE::GetRuntime<INNERuntimeRDG>(RuntimeName);
if (!Runtime.IsValid())
{
#if WITH_EDITOR
UE_LOG(LogNeuralPostProcessing, Error, TEXT("Can't get %s runtime."), *RuntimeName);
#endif
return nullptr;
}
if (!RuntimeRDG.IsValid())
{
#if WITH_EDITOR
UE_LOG(LogNeuralPostProcessing, Error, TEXT("No RDG runtime '%s' found"), *RuntimeName);
#endif
return nullptr;
}
TSharedPtr<UE::NNE::IModelRDG> ModelRDG = RuntimeRDG->CreateModelRDG(NNEModelData);
if (!ModelRDG.IsValid())
{
#if WITH_EDITOR
UE_LOG(LogNeuralPostProcessing, Error, TEXT("CreateModelRDG failed for Model = %s, Runtime = %s"), *NNEModelData->GetName(), *RuntimeName);
#endif
return nullptr;
}
return ModelRDG->CreateModelInstanceRDG();
}
UNeuralPostProcessModelInstance::UNeuralPostProcessModelInstance(FObjectInitializer const& ObjectInitializer)
:Super(ObjectInitializer)
{
RDGInputBuffer = nullptr;
RDGOutputBuffer = nullptr;
RDGTiledInputBuffer = nullptr;
RDGTiledOutputBuffer = nullptr;
DispatchSize = 1;
ModelTileSize = ENeuralModelTileType::OneByOne;
DimensionOverride = FIntVector4(-1);
TileOverlap = FIntPoint(0);
}
void UNeuralPostProcessModelInstance::Update(UNNEModelData* NNEModelData, FString RuntimeName)
{
this->CreateDefaultNNEModel(NNEModelData, RuntimeName);
}
void UNeuralPostProcessModelInstance::Execute(FRDGBuilder& GraphBuilder)
{
TArray<UE::NNE::FTensorBindingRDG> InputBindings;
TArray<UE::NNE::FTensorBindingRDG> OutputBindings;
UE::NNE::FTensorBindingRDG& Input = InputBindings.Emplace_GetRef();
UE::NNE::FTensorBindingRDG& Output = OutputBindings.Emplace_GetRef();
UE::NNE::FTensorShape InputShape = GetResolvedInputTensorShape();
int32 InputBatchSize = InputShape.GetData()[0];
auto RunSingleDispatch = [&](bool bAddFence = true) {
Input.Buffer = RDGInputBuffer;
Output.Buffer = RDGOutputBuffer;
UE::NNE::IModelInstanceRDG::EEnqueueRDGStatus Status = ModelInstanceRDG->EnqueueRDG(GraphBuilder, InputBindings, OutputBindings);
checkf(Status == UE::NNE::IModelInstanceRDG::EEnqueueRDGStatus::Ok, TEXT("EnqueueRDG failed: %d"), static_cast<int>(Status));
if (bAddFence)
{
AddPass(GraphBuilder, RDG_EVENT_NAME("EnqueueRDGFence"), [this](FRHICommandListImmediate& RHICmdList)
{
// Need to wait until the output buffer, RDGOutputBuffer, is ready.
RHICmdList.SubmitCommandsAndFlushGPU();
RHICmdList.BlockUntilGPUIdle();
});
}
};
if (DispatchSize == 1)
{
RunSingleDispatch();
}
else if (DispatchSize > 1)
{
// Since we have more tiles than batch per dispatch we need to copy the data from tiled
// to and from the buffer.
int32 NumOfDispatch = DispatchSize;
int32 InputBufferSize = RDGInputBuffer->Desc.GetSize();
int32 OutputBufferSize = RDGOutputBuffer->Desc.GetSize();
for (int i = 0; i < NumOfDispatch; ++i)
{
bool bAddFence = i == NumOfDispatch - 1;
AddCopyBufferPass(GraphBuilder, RDGInputBuffer, 0, RDGTiledInputBuffer, InputBufferSize * i, InputBufferSize);
RunSingleDispatch(bAddFence);
AddCopyBufferPass(GraphBuilder, RDGTiledOutputBuffer, OutputBufferSize * i, RDGOutputBuffer, 0, OutputBufferSize);
}
}
}
UE::NNE::FTensorShape UNeuralPostProcessModelInstance::GetResolvedInputTensorShape()
{
return ResolvedInputTensorShape;
}
UE::NNE::FTensorShape UNeuralPostProcessModelInstance::GetResolvedOutputTensorShape()
{
return ResolvedOutputTensorShape;
}
FRDGBufferRef UNeuralPostProcessModelInstance::GetInputBuffer()
{
return RDGInputBuffer;
}
FRDGBufferRef UNeuralPostProcessModelInstance::GetOutputBuffer()
{
return RDGOutputBuffer;
}
FRDGBufferRef UNeuralPostProcessModelInstance::GetTiledInputBuffer()
{
return RDGTiledInputBuffer;
}
FRDGBufferRef UNeuralPostProcessModelInstance::GetTiledOutputBuffer()
{
return RDGTiledOutputBuffer;
}
bool UNeuralPostProcessModelInstance::ModifyInputShape(int Dim, int Size)
{
// setup inputs
TConstArrayView<UE::NNE::FTensorDesc> InputTensorDescs = ModelInstanceRDG->GetInputTensorDescs();
UE::NNE::FSymbolicTensorShape InputShapeTemplate = InputTensorDescs[0].GetShape();
checkf(InputShapeTemplate.Rank() == 4, TEXT("Post Processing requires models with input shape batch x channel x height x width!"));
// We can only modify dimensions supporting dynamic Size
UE::NNE::FTensorShape CurrentResolvedInputTensorShape = GetResolvedInputTensorShape();
uint32 Batch = (Dim == 0) && (InputShapeTemplate.GetData()[0] < 0) ? Size : CurrentResolvedInputTensorShape.GetData()[0];
uint32 Channel = (Dim == 1) && (InputShapeTemplate.GetData()[1] < 0) ? Size : CurrentResolvedInputTensorShape.GetData()[1];
uint32 Height = (Dim == 2) && (InputShapeTemplate.GetData()[2] < 0) ? Size : CurrentResolvedInputTensorShape.GetData()[2];
uint32 Width = (Dim == 3) && (InputShapeTemplate.GetData()[3] < 0) ? Size : CurrentResolvedInputTensorShape.GetData()[3];
TArray<uint32> NewResolvedInputShape = {Batch,Channel,Height,Width};
if (NewResolvedInputShape[Dim] != Size)
{
#if WITH_EDITOR
UE_LOG(LogNeuralPostProcessing, Error, TEXT("Cannot set dimension %d to %d. It is not dynamic. revert back to %d"),Dim, Size, NewResolvedInputShape[Dim]);
#endif
return false;
}
ResolvedInputTensorShape = UE::NNE::FTensorShape::Make(NewResolvedInputShape);
ModelInstanceRDG->SetInputTensorShapes({ ResolvedInputTensorShape });
// Update the output shape as well due to the change of the input tensor shape
TConstArrayView<UE::NNE::FTensorShape> ResolvedOutputTensorShapes = ModelInstanceRDG->GetOutputTensorShapes();
if (ResolvedOutputTensorShapes.Num() < 1)
{
#if WITH_EDITOR
UE_LOG(LogNeuralPostProcessing, Error, TEXT("Cannot set dimension %d to %d due to NNE eror. Revert back to %d"), Dim, Size, CurrentResolvedInputTensorShape.GetData()[Dim]);
#endif
ModelInstanceRDG->SetInputTensorShapes({ CurrentResolvedInputTensorShape });
return false;
}
ResolvedOutputTensorShape = ResolvedOutputTensorShapes[0];
return true;
}
void UNeuralPostProcessModelInstance::CreateRDGBuffers(class FRDGBuilder& GraphBuilder)
{
// Crate input buffers
UE::NNE::FTensorShape InputShape = GetResolvedInputTensorShape();
FIntPoint NeuralNetworkInputSize = { (int32) InputShape.GetData()[3], (int32) InputShape.GetData()[2] };
int32 InputBatch = InputShape.GetData()[0];
int32 InputChannels = InputShape.GetData()[1];
FRDGBufferDesc InputBufferDesc = FRDGBufferDesc::CreateBufferDesc(sizeof(float), NeuralNetworkInputSize.X * NeuralNetworkInputSize.Y * InputChannels * InputBatch);
RDGInputBuffer = GraphBuilder.CreateBuffer(InputBufferDesc, TEXT("NeuralPostProcessing.InputBuffer"));
// Create output buffers
checkf(ModelInstanceRDG->GetOutputTensorShapes().Num() >= 1, TEXT("Post Processing requires models with a single output tensor!"));
ResolvedOutputTensorShape = ModelInstanceRDG->GetOutputTensorShapes()[0];
checkf(ResolvedOutputTensorShape.Rank() == 4, TEXT("Neural Post Processing requires models dim = 4 [static/dynamic x C x height x width]!"));
checkf(ResolvedOutputTensorShape.GetData()[0] != 0 , TEXT("Neural Post Processing requires models with output shape BatchSize != 0"));
checkf(ResolvedOutputTensorShape.GetData()[1] >= 1, TEXT("Neural Post Processing requires models with output shape channel C >= 1!"));
checkf(ResolvedOutputTensorShape.GetData()[2] > 0, TEXT("Neural Post Processing requires models with output height > 0!"));
checkf(ResolvedOutputTensorShape.GetData()[3] > 0, TEXT("Neural Post Processing requires models with output width > 0!"));
FIntPoint NeuralNetworkOutputSize = { (int32)ResolvedOutputTensorShape.GetData()[3], (int32)ResolvedOutputTensorShape.GetData()[2] };
int32 OutputBatch = ResolvedOutputTensorShape.GetData()[0];
int32 OutputChannels = ResolvedOutputTensorShape.GetData()[1];
FRDGBufferDesc OutputBufferDesc = FRDGBufferDesc::CreateBufferDesc(sizeof(float),
NeuralNetworkOutputSize.X * NeuralNetworkOutputSize.Y * OutputChannels * OutputBatch);
RDGOutputBuffer = GraphBuilder.CreateBuffer(OutputBufferDesc, *FString("SubsurfacePostProcessing.OutputBuffer"));
if (DispatchSize <= 1)
{
RDGTiledInputBuffer = RDGInputBuffer;
RDGTiledOutputBuffer = RDGOutputBuffer;
}
else
{
FRDGBufferDesc InputTiledBufferDesc = FRDGBufferDesc::CreateBufferDesc(sizeof(float),
NeuralNetworkInputSize.X * NeuralNetworkInputSize.Y * InputChannels * InputBatch * DispatchSize);
RDGTiledInputBuffer = GraphBuilder.CreateBuffer(InputTiledBufferDesc, TEXT("NeuralPostProcessing.TiledInputBuffer"));
FRDGBufferDesc OutputTiledBufferDesc = FRDGBufferDesc::CreateBufferDesc(sizeof(float),
NeuralNetworkOutputSize.X * NeuralNetworkOutputSize.Y * OutputChannels * OutputBatch * DispatchSize);
RDGTiledOutputBuffer = GraphBuilder.CreateBuffer(OutputTiledBufferDesc, TEXT("NeuralPostProcessing.TiledOutputBuffer"));
}
}
void UNeuralPostProcessModelInstance::CreateRDGBuffersIfNeeded(FRDGBuilder& GraphBuilder, bool bForceCreate)
{
if (!RDGInputBuffer || !RDGOutputBuffer || bForceCreate)
{
this->CreateRDGBuffers(GraphBuilder);
}
}
void UNeuralPostProcessModelInstance::CreateDefaultNNEModel(UNNEModelData* NNEModelData, FString RuntimeName)
{
ModelInstanceRDG = CreateNNEModelInstance(NNEModelData, RuntimeName);
if (ModelInstanceRDG)
{
// setup inputs
TConstArrayView<UE::NNE::FTensorDesc> InputTensorDescs = ModelInstanceRDG->GetInputTensorDescs();
// dimension check
checkf(InputTensorDescs.Num() >= 1, TEXT("Post Processing requires models with a single input tensor!"));
UE::NNE::FSymbolicTensorShape InputShape = InputTensorDescs[0].GetShape();
checkf(InputShape.Rank() == 4, TEXT("Post Processing requires models with input shape 1 x N(N>=1) x height x width!"));
checkf(InputShape.GetData()[0] != 0, TEXT("Post Processing requires models with input shape ? x N(N>=1) x height x width!"));
checkf(InputShape.GetData()[1] >= 1, TEXT("Post Processing requires models with input shape ? x N(N>=1) x height x width!"));
// Set up the input tensor shape
// All dynamic dimensions are set to 1 by default.
ResolvedInputTensorShape = UE::NNE::FTensorShape::MakeFromSymbolic(InputShape);
if (ModelInstanceRDG->SetInputTensorShapes({ ResolvedInputTensorShape }) != UE::NNE::IModelInstanceRDG::ESetInputTensorShapesStatus::Ok)
{
ModelInstanceRDG.Reset();
#if WITH_EDITOR
UE_LOG(LogNeuralPostProcessing, Warning, TEXT("SetInputTensorShapes Failed for NNE RDG Model = %s, Runtime = %s"), *NNEModelData->GetName(), *RuntimeName);
#endif
}
}
else
{
#if WITH_EDITOR
UE_LOG(LogNeuralPostProcessing, Error, TEXT("Failed to create NNE RDG Model."));
#endif
}
}