Files
UnrealEngine/Engine/Shaders/Private/Nanite/NaniteRasterizer.ush
2025-05-18 13:04:45 +08:00

300 lines
7.1 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
struct FRasterTri
{
int2 MinPixel;
int2 MaxPixel;
float2 Edge01;
float2 Edge12;
float2 Edge20;
float C0;
float C1;
float C2;
float3 DepthPlane;
float3 InvW;
float3 Barycentrics_dx;
float3 Barycentrics_dy;
bool bIsValid;
bool bBackFace;
};
template< uint SubpixelSamples, bool bBackFaceCull >
FRasterTri SetupTriangle( int4 ScissorRect, float4 Verts[3] )
{
FRasterTri Tri;
Tri.bIsValid = true;
Tri.InvW = float3( Verts[0].w, Verts[1].w, Verts[2].w );
// 16.8 fixed point
float2 Vert0 = Verts[0].xy;
float2 Vert1 = Verts[1].xy;
float2 Vert2 = Verts[2].xy;
// 4.8 fixed point
Tri.Edge01 = Vert0 - Vert1;
Tri.Edge12 = Vert1 - Vert2;
Tri.Edge20 = Vert2 - Vert0;
float DetXY = Tri.Edge01.y * Tri.Edge20.x - Tri.Edge01.x * Tri.Edge20.y;
Tri.bBackFace = (DetXY >= 0.0f);
if( bBackFaceCull )
Tri.bIsValid = !Tri.bBackFace;
BRANCH
if( !bBackFaceCull && Tri.bBackFace )
{
// Swap winding order
Tri.Edge01 *= -1.0f;
Tri.Edge12 *= -1.0f;
Tri.Edge20 *= -1.0f;
}
// Bounding rect
const float2 MinSubpixel = min3( Vert0, Vert1, Vert2 );
const float2 MaxSubpixel = max3( Vert0, Vert1, Vert2 );
// Round to nearest pixel
Tri.MinPixel = (int2)floor( ( MinSubpixel + (SubpixelSamples / 2) - 1 ) * (1.0 / SubpixelSamples) );
Tri.MaxPixel = (int2)floor( ( MaxSubpixel - (SubpixelSamples / 2) - 1 ) * (1.0 / SubpixelSamples) ); // inclusive!
// Scissor
Tri.MinPixel = max( Tri.MinPixel, ScissorRect.xy );
Tri.MaxPixel = min( Tri.MaxPixel, ScissorRect.zw - 1 );
// Limit the rasterizer bounds to a sensible max.
Tri.MaxPixel = min( Tri.MaxPixel, Tri.MinPixel + 63 );
// Cull when no pixels covered
if( any( Tri.MinPixel > Tri.MaxPixel ) )
Tri.bIsValid = false;
// Rebase off MinPixel with half pixel offset
// 4.8 fixed point
// Max triangle size should only be 7x7 pixels. Not sure why this works for larger triangles.
const float2 BaseSubpixel = (float2)Tri.MinPixel * SubpixelSamples + (SubpixelSamples / 2);
Vert0 -= BaseSubpixel;
Vert1 -= BaseSubpixel;
Vert2 -= BaseSubpixel;
// Half-edge constants
// 8.16 fixed point
Tri.C0 = Tri.Edge12.y * Vert1.x - Tri.Edge12.x * Vert1.y;
Tri.C1 = Tri.Edge20.y * Vert2.x - Tri.Edge20.x * Vert2.y;
Tri.C2 = Tri.Edge01.y * Vert0.x - Tri.Edge01.x * Vert0.y;
// Sum C before nudging for fill convention. Afterwards it could be zero.
const float ScaleToUnit = SubpixelSamples / ( Tri.C0 + Tri.C1 + Tri.C2 );
// Correct for fill convention
// Top left rule for CCW
#if 1
Tri.C0 -= saturate( Tri.Edge12.y + saturate( 1.0f - Tri.Edge12.x ) );
Tri.C1 -= saturate( Tri.Edge20.y + saturate( 1.0f - Tri.Edge20.x ) );
Tri.C2 -= saturate( Tri.Edge01.y + saturate( 1.0f - Tri.Edge01.x ) );
#else
Tri.C0 -= ( Tri.Edge12.y < 0 || ( Tri.Edge12.y == 0 && Tri.Edge12.x > 0 ) ) ? 0 : 1;
Tri.C1 -= ( Tri.Edge20.y < 0 || ( Tri.Edge20.y == 0 && Tri.Edge20.x > 0 ) ) ? 0 : 1;
Tri.C2 -= ( Tri.Edge01.y < 0 || ( Tri.Edge01.y == 0 && Tri.Edge01.x > 0 ) ) ? 0 : 1;
#endif
#if 0
// Step in pixel increments
// 8.16 fixed point
Tri.Edge01 *= SubpixelSamples;
Tri.Edge12 *= SubpixelSamples;
Tri.Edge20 *= SubpixelSamples;
#else
// Scale C0/C1/C2 down by SubpixelSamples instead of scaling Edge01/Edge12/Edge20 up. Lossless because SubpixelSamples is a power of two.
Tri.C0 *= (1.0f / SubpixelSamples);
Tri.C1 *= (1.0f / SubpixelSamples);
Tri.C2 *= (1.0f / SubpixelSamples);
#endif
Tri.Barycentrics_dx = float3( -Tri.Edge12.y, -Tri.Edge20.y, -Tri.Edge01.y ) * ScaleToUnit;
Tri.Barycentrics_dy = float3( Tri.Edge12.x, Tri.Edge20.x, Tri.Edge01.x ) * ScaleToUnit;
Tri.DepthPlane.x = Verts[0].z;
Tri.DepthPlane.y = Verts[1].z - Verts[0].z;
Tri.DepthPlane.z = Verts[2].z - Verts[0].z;
Tri.DepthPlane.yz *= ScaleToUnit;
return Tri;
}
template< typename FWritePixel >
void RasterizeTri_Rect( FRasterTri Tri, FWritePixel WritePixel )
{
float CY0 = Tri.C0;
float CY1 = Tri.C1;
float CY2 = Tri.C2;
int y = Tri.MinPixel.y;
while (true)
{
int x = Tri.MinPixel.x;
if (min3(CY0, CY1, CY2) >= 0)
{
WritePixel( uint2(x,y), float3(CY0, CY1, CY2), Tri );
}
if (x < Tri.MaxPixel.x)
{
float CX0 = CY0 - Tri.Edge12.y;
float CX1 = CY1 - Tri.Edge20.y;
float CX2 = CY2 - Tri.Edge01.y;
x++;
while (true)
{
if (min3(CX0, CX1, CX2) >= 0)
{
WritePixel( int2(x,y), float3(CX0, CX1, CX2), Tri );
}
if (x >= Tri.MaxPixel.x)
break;
CX0 -= Tri.Edge12.y;
CX1 -= Tri.Edge20.y;
CX2 -= Tri.Edge01.y;
x++;
}
}
if (y >= Tri.MaxPixel.y)
break;
CY0 += Tri.Edge12.x;
CY1 += Tri.Edge20.x;
CY2 += Tri.Edge01.x;
y++;
}
}
template< typename FWritePixel >
void RasterizeTri_RectSingle( FRasterTri Tri, FWritePixel WritePixel )
{
float CY0 = Tri.C0;
float CY1 = Tri.C1;
float CY2 = Tri.C2;
float CX0 = CY0;
float CX1 = CY1;
float CX2 = CY2;
int x = Tri.MinPixel.x;
int y = Tri.MinPixel.y;
while( true )
{
if( min3( CX0, CX1, CX2 ) >= 0 )
{
WritePixel( uint2(x,y), float3(CX0, CX1, CX2), Tri );
}
if( x < Tri.MaxPixel.x )
{
CX0 -= Tri.Edge12.y;
CX1 -= Tri.Edge20.y;
CX2 -= Tri.Edge01.y;
x++;
}
else if( y < Tri.MaxPixel.y )
{
CY0 += Tri.Edge12.x;
CY1 += Tri.Edge20.x;
CY2 += Tri.Edge01.x;
y++;
CX0 = CY0;
CX1 = CY1;
CX2 = CY2;
x = Tri.MinPixel.x;
}
else
{
break;
}
}
}
template< typename FWritePixel >
void RasterizeTri_Scanline( FRasterTri Tri, FWritePixel WritePixel )
{
float CY0 = Tri.C0;
float CY1 = Tri.C1;
float CY2 = Tri.C2;
float3 Edge012 = { Tri.Edge12.y, Tri.Edge20.y, Tri.Edge01.y };
bool3 bOpenEdge = Edge012 < 0;
float3 InvEdge012 = select( Edge012 == 0, 1e8, rcp( Edge012 ) );
int y = Tri.MinPixel.y;
while( true )
{
//float CX0 = CY0 - Edge12.y * (x - MinPixel.x);
// Edge12.y * (x - MinPixel.x) <= CY0;
/*
if( Edge12.y > 0 )
x <= CY0 / Edge12.y + MinPixel.x; // Closing edge
else
x >= CY0 / Edge12.y + MinPixel.x; // Opening edge
*/
// No longer fixed point
float3 CrossX = float3( CY0, CY1, CY2 ) * InvEdge012;
float3 MinX = select( bOpenEdge, CrossX, 0.0 );
float3 MaxX = select( bOpenEdge, Tri.MaxPixel.x - Tri.MinPixel.x, CrossX );
float x0 = ceil( max3( MinX.x, MinX.y, MinX.z ) );
float x1 = min3( MaxX.x, MaxX.y, MaxX.z );
float CX0 = CY0 - x0 * Tri.Edge12.y;
float CX1 = CY1 - x0 * Tri.Edge20.y;
float CX2 = CY2 - x0 * Tri.Edge01.y;
x0 += Tri.MinPixel.x;
x1 += Tri.MinPixel.x;
// NOTE: In some cases x0 > x1 and we need to avoid writing pixels in those situations
// or else artifacts can appear, particularly in VSM near page edges.
for (float x = x0; x <= x1; x++)
{
if (min3(CX0, CX1, CX2) >= 0)
WritePixel(uint2(x, y), float3(CX0, CX1, CX2), Tri);
CX0 -= Tri.Edge12.y;
CX1 -= Tri.Edge20.y;
CX2 -= Tri.Edge01.y;
}
if( y >= Tri.MaxPixel.y )
break;
CY0 += Tri.Edge12.x;
CY1 += Tri.Edge20.x;
CY2 += Tri.Edge01.x;
y++;
}
}
template< typename FWritePixel >
void RasterizeTri_Adaptive( FRasterTri Tri, FWritePixel WritePixel )
{
bool bScanline = NANITE_PIXEL_PROGRAMMABLE || WaveActiveAnyTrue( Tri.MaxPixel.x - Tri.MinPixel.x > 4 );
if( bScanline )
RasterizeTri_Scanline( Tri, WritePixel );
else
RasterizeTri_Rect( Tri, WritePixel );
}