Files
UnrealEngine/Engine/Source/Developer/MeshBuilderCommon/Private/ThirdPartyBuildOptimizationHelper.cpp
2025-05-18 13:04:45 +08:00

371 lines
10 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "ThirdPartyBuildOptimizationHelper.h"
#include "CoreMinimal.h"
#include "CoreTypes.h"
#include "NvTriStrip.h"
#include "forsythtriangleorderoptimizer.h"
#ifndef _SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS
#define _SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS 1
#endif
#include "nvtess.h"
namespace BuildOptimizationThirdParty
{
// CVars
static TAutoConsoleVariable<int32> CVarTriangleOrderOptimization(
TEXT("r.TriangleOrderOptimization"),
1,
TEXT("Controls the algorithm to use when optimizing the triangle order for the post-transform cache.\n")
TEXT("0: Use NVTriStrip (slower)\n")
TEXT("1: Use Forsyth algorithm (fastest)(default)")
TEXT("2: No triangle order optimization. (least efficient, debugging purposes only)"),
ECVF_Default);
namespace NvTriStripHelper
{
/**
* Converts 16 bit indices to 32 bit prior to passing them into the real GenerateStrips util method
*/
void GenerateStrips(
const uint8* Indices,
bool Is32Bit,
const uint32 NumIndices,
PrimitiveGroup** PrimGroups,
uint32* NumGroups
)
{
if (Is32Bit)
{
GenerateStrips((uint32*)Indices, NumIndices, PrimGroups, NumGroups);
}
else
{
// convert to 32 bit
uint32 Idx;
TArray<uint32> NewIndices;
NewIndices.AddUninitialized(NumIndices);
for (Idx = 0; Idx < NumIndices; ++Idx)
{
NewIndices[Idx] = ((uint16*)Indices)[Idx];
}
GenerateStrips(NewIndices.GetData(), NumIndices, PrimGroups, NumGroups);
}
}
/**
* Orders a triangle list for better vertex cache coherency.
*
* *** WARNING: This is safe to call for multiple threads IF AND ONLY IF all
* threads call SetListsOnly(true) and SetCacheSize(CACHESIZE_GEFORCE3). If
* NvTriStrip is ever used with different settings the library will need
* some modifications to be thread-safe. ***
*/
template<typename IndexDataType, typename Allocator>
void CacheOptimizeIndexBuffer(TArray<IndexDataType, Allocator>& Indices)
{
static_assert(sizeof(IndexDataType) == 2 || sizeof(IndexDataType) == 4, "Indices must be short or int.");
PrimitiveGroup* PrimitiveGroups = NULL;
uint32 NumPrimitiveGroups = 0;
bool Is32Bit = sizeof(IndexDataType) == 4;
SetListsOnly(true);
SetCacheSize(CACHESIZE_GEFORCE3);
GenerateStrips((uint8*)Indices.GetData(), Is32Bit, Indices.Num(), &PrimitiveGroups, &NumPrimitiveGroups);
Indices.Empty();
Indices.AddUninitialized(PrimitiveGroups->numIndices);
if (Is32Bit)
{
FMemory::Memcpy(Indices.GetData(), PrimitiveGroups->indices, Indices.Num() * sizeof(IndexDataType));
}
else
{
for (uint32 I = 0; I < PrimitiveGroups->numIndices; ++I)
{
Indices[I] = (uint16)PrimitiveGroups->indices[I];
}
}
delete[] PrimitiveGroups;
}
/**
* Provides skeletal mesh render data to the NVIDIA tessellation library.
*/
class FSkeletalMeshNvRenderBuffer : public nv::RenderBuffer
{
public:
/** Construct from static mesh render buffers. */
FSkeletalMeshNvRenderBuffer(
const TArray<FSoftSkinVertex>& InVertexBuffer,
const uint32 InTexCoordCount,
const TArray<uint32>& Indices)
: VertexBuffer(InVertexBuffer)
, TexCoordCount(InTexCoordCount)
{
mIb = new nv::IndexBuffer((void*)Indices.GetData(), nv::IBT_U32, Indices.Num(), false);
}
/** Retrieve the position and first texture coordinate of the specified index. */
virtual nv::Vertex getVertex(unsigned int Index) const
{
nv::Vertex Vertex;
check(Index < (unsigned int)VertexBuffer.Num());
const FSoftSkinVertex& SrcVertex = VertexBuffer[Index];
Vertex.pos.x = SrcVertex.Position.X;
Vertex.pos.y = SrcVertex.Position.Y;
Vertex.pos.z = SrcVertex.Position.Z;
if (TexCoordCount > 0)
{
Vertex.uv.x = SrcVertex.UVs[0].X;
Vertex.uv.y = SrcVertex.UVs[0].Y;
}
else
{
Vertex.uv.x = 0.0f;
Vertex.uv.y = 0.0f;
}
return Vertex;
}
private:
/** The vertex buffer for the skeletal mesh. */
const TArray<FSoftSkinVertex>& VertexBuffer;
const uint32 TexCoordCount;
/** Copying is forbidden. */
FSkeletalMeshNvRenderBuffer(const FSkeletalMeshNvRenderBuffer&);
FSkeletalMeshNvRenderBuffer& operator=(const FSkeletalMeshNvRenderBuffer&);
};
/**
* Provides static mesh render data to the NVIDIA tessellation library.
*/
class FStaticMeshNvRenderBuffer : public nv::RenderBuffer
{
public:
/** Construct from static mesh render buffers. */
FStaticMeshNvRenderBuffer(
const FPositionVertexBuffer& InPositionVertexBuffer,
const FStaticMeshVertexBuffer& InVertexBuffer,
const TArray<uint32>& Indices);
/** Retrieve the position and first texture coordinate of the specified index. */
virtual nv::Vertex getVertex(unsigned int Index) const;
private:
/** The position vertex buffer for the static mesh. */
const FPositionVertexBuffer& PositionVertexBuffer;
/** The vertex buffer for the static mesh. */
const FStaticMeshVertexBuffer& VertexBuffer;
/** Copying is forbidden. */
FStaticMeshNvRenderBuffer(const FStaticMeshNvRenderBuffer&);
FStaticMeshNvRenderBuffer& operator=(const FStaticMeshNvRenderBuffer&);
};
FStaticMeshNvRenderBuffer::FStaticMeshNvRenderBuffer(
const FPositionVertexBuffer& InPositionVertexBuffer,
const FStaticMeshVertexBuffer& InVertexBuffer,
const TArray<uint32>& Indices)
: PositionVertexBuffer(InPositionVertexBuffer)
, VertexBuffer(InVertexBuffer)
{
check(PositionVertexBuffer.GetNumVertices() == VertexBuffer.GetNumVertices());
mIb = new nv::IndexBuffer((void*)Indices.GetData(), nv::IBT_U32, Indices.Num(), false);
}
nv::Vertex FStaticMeshNvRenderBuffer::getVertex(unsigned int Index) const
{
nv::Vertex Vertex;
check(Index < PositionVertexBuffer.GetNumVertices());
const FVector3f& Position = PositionVertexBuffer.VertexPosition(Index);
Vertex.pos.x = Position.X;
Vertex.pos.y = Position.Y;
Vertex.pos.z = Position.Z;
if (VertexBuffer.GetNumTexCoords())
{
const FVector2f UV = VertexBuffer.GetVertexUV(Index, 0);
Vertex.uv.x = UV.X;
Vertex.uv.y = UV.Y;
}
else
{
Vertex.uv.x = 0.0f;
Vertex.uv.y = 0.0f;
}
return Vertex;
}
void BuildStaticAdjacencyIndexBuffer(
const FPositionVertexBuffer& PositionVertexBuffer,
const FStaticMeshVertexBuffer& VertexBuffer,
const TArray<uint32>& Indices,
TArray<uint32>& OutPnAenIndices
)
{
if (Indices.Num())
{
FStaticMeshNvRenderBuffer StaticMeshRenderBuffer(PositionVertexBuffer, VertexBuffer, Indices);
nv::IndexBuffer* PnAENIndexBuffer = nv::tess::buildTessellationBuffer(&StaticMeshRenderBuffer, nv::DBM_PnAenDominantCorner, true);
check(PnAENIndexBuffer);
const int32 IndexCount = (int32)PnAENIndexBuffer->getLength();
OutPnAenIndices.Empty(IndexCount);
OutPnAenIndices.AddUninitialized(IndexCount);
for (int32 Index = 0; Index < IndexCount; ++Index)
{
OutPnAenIndices[Index] = (*PnAENIndexBuffer)[Index];
}
delete PnAENIndexBuffer;
}
else
{
OutPnAenIndices.Empty();
}
}
void BuildSkeletalAdjacencyIndexBuffer(
const TArray<FSoftSkinVertex>& VertexBuffer,
const uint32 TexCoordCount,
const TArray<uint32>& Indices,
TArray<uint32>& OutPnAenIndices
)
{
if (Indices.Num())
{
FSkeletalMeshNvRenderBuffer SkeletalMeshRenderBuffer(VertexBuffer, TexCoordCount, Indices);
nv::IndexBuffer* PnAENIndexBuffer = nv::tess::buildTessellationBuffer(&SkeletalMeshRenderBuffer, nv::DBM_PnAenDominantCorner, true);
check(PnAENIndexBuffer);
const int32 IndexCount = (int32)PnAENIndexBuffer->getLength();
OutPnAenIndices.Empty(IndexCount);
OutPnAenIndices.AddUninitialized(IndexCount);
for (int32 Index = 0; Index < IndexCount; ++Index)
{
OutPnAenIndices[Index] = (*PnAENIndexBuffer)[Index];
}
delete PnAENIndexBuffer;
}
else
{
OutPnAenIndices.Empty();
}
}
}
namespace ForsythHelper
{
void OptimizeFaces(
const uint8* Indices,
bool Is32Bit,
const uint32 NumIndices,
uint32 NumVertices,
uint32* OutIndices,
uint16 CacheSize
)
{
if (Is32Bit)
{
Forsyth::OptimizeFaces((uint32*)Indices, NumIndices, NumVertices, OutIndices, CacheSize);
}
else
{
// convert to 32 bit
uint32 Idx;
TArray<uint32> NewIndices;
NewIndices.AddUninitialized(NumIndices);
for (Idx = 0; Idx < NumIndices; ++Idx)
{
NewIndices[Idx] = ((uint16*)Indices)[Idx];
}
Forsyth::OptimizeFaces(NewIndices.GetData(), NumIndices, NumVertices, OutIndices, CacheSize);
}
}
template<typename IndexDataType, typename Allocator>
void CacheOptimizeIndexBuffer(TArray<IndexDataType, Allocator>& Indices)
{
static_assert(sizeof(IndexDataType) == 2 || sizeof(IndexDataType) == 4, "Indices must be short or int.");
bool Is32Bit = sizeof(IndexDataType) == 4;
// Count the number of vertices
uint32 NumVertices = 0;
for (int32 Index = 0; Index < Indices.Num(); ++Index)
{
if (Indices[Index] > NumVertices)
{
NumVertices = Indices[Index];
}
}
NumVertices += 1;
TArray<uint32> OptimizedIndices;
OptimizedIndices.AddUninitialized(Indices.Num());
uint16 CacheSize = 32;
OptimizeFaces((uint8*)Indices.GetData(), Is32Bit, Indices.Num(), NumVertices, OptimizedIndices.GetData(), CacheSize);
if (Is32Bit)
{
FMemory::Memcpy(Indices.GetData(), OptimizedIndices.GetData(), Indices.Num() * sizeof(IndexDataType));
}
else
{
for (int32 I = 0; I < OptimizedIndices.Num(); ++I)
{
Indices[I] = (uint16)OptimizedIndices[I];
}
}
}
}
void CacheOptimizeIndexBuffer(TArray<uint16>& Indices)
{
bool bDisableTriangleOrderOptimization = (CVarTriangleOrderOptimization.GetValueOnAnyThread(true) == 2);
bool bUsingNvTriStrip = !bDisableTriangleOrderOptimization && (CVarTriangleOrderOptimization.GetValueOnAnyThread(true) == 0);
if (bUsingNvTriStrip)
{
NvTriStripHelper::CacheOptimizeIndexBuffer(Indices);
}
else if (!bDisableTriangleOrderOptimization)
{
ForsythHelper::CacheOptimizeIndexBuffer(Indices);
}
}
void CacheOptimizeIndexBuffer(TArray<uint32>& Indices)
{
bool bDisableTriangleOrderOptimization = (CVarTriangleOrderOptimization.GetValueOnAnyThread(true) == 2);
bool bUsingNvTriStrip = !bDisableTriangleOrderOptimization && (CVarTriangleOrderOptimization.GetValueOnAnyThread(true) == 0);
if (bUsingNvTriStrip)
{
NvTriStripHelper::CacheOptimizeIndexBuffer(Indices);
}
else if (!bDisableTriangleOrderOptimization)
{
ForsythHelper::CacheOptimizeIndexBuffer(Indices);
}
}
}