300 lines
7.1 KiB
HLSL
300 lines
7.1 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#pragma once
|
|
|
|
struct FRasterTri
|
|
{
|
|
int2 MinPixel;
|
|
int2 MaxPixel;
|
|
|
|
float2 Edge01;
|
|
float2 Edge12;
|
|
float2 Edge20;
|
|
|
|
float C0;
|
|
float C1;
|
|
float C2;
|
|
|
|
float3 DepthPlane;
|
|
float3 InvW;
|
|
|
|
float3 Barycentrics_dx;
|
|
float3 Barycentrics_dy;
|
|
|
|
bool bIsValid;
|
|
bool bBackFace;
|
|
};
|
|
|
|
template< uint SubpixelSamples, bool bBackFaceCull >
|
|
FRasterTri SetupTriangle( int4 ScissorRect, float4 Verts[3] )
|
|
{
|
|
FRasterTri Tri;
|
|
Tri.bIsValid = true;
|
|
Tri.InvW = float3( Verts[0].w, Verts[1].w, Verts[2].w );
|
|
|
|
// 16.8 fixed point
|
|
float2 Vert0 = Verts[0].xy;
|
|
float2 Vert1 = Verts[1].xy;
|
|
float2 Vert2 = Verts[2].xy;
|
|
|
|
// 4.8 fixed point
|
|
Tri.Edge01 = Vert0 - Vert1;
|
|
Tri.Edge12 = Vert1 - Vert2;
|
|
Tri.Edge20 = Vert2 - Vert0;
|
|
|
|
float DetXY = Tri.Edge01.y * Tri.Edge20.x - Tri.Edge01.x * Tri.Edge20.y;
|
|
Tri.bBackFace = (DetXY >= 0.0f);
|
|
|
|
if( bBackFaceCull )
|
|
Tri.bIsValid = !Tri.bBackFace;
|
|
|
|
BRANCH
|
|
if( !bBackFaceCull && Tri.bBackFace )
|
|
{
|
|
// Swap winding order
|
|
Tri.Edge01 *= -1.0f;
|
|
Tri.Edge12 *= -1.0f;
|
|
Tri.Edge20 *= -1.0f;
|
|
}
|
|
|
|
// Bounding rect
|
|
const float2 MinSubpixel = min3( Vert0, Vert1, Vert2 );
|
|
const float2 MaxSubpixel = max3( Vert0, Vert1, Vert2 );
|
|
|
|
// Round to nearest pixel
|
|
Tri.MinPixel = (int2)floor( ( MinSubpixel + (SubpixelSamples / 2) - 1 ) * (1.0 / SubpixelSamples) );
|
|
Tri.MaxPixel = (int2)floor( ( MaxSubpixel - (SubpixelSamples / 2) - 1 ) * (1.0 / SubpixelSamples) ); // inclusive!
|
|
|
|
// Scissor
|
|
Tri.MinPixel = max( Tri.MinPixel, ScissorRect.xy );
|
|
Tri.MaxPixel = min( Tri.MaxPixel, ScissorRect.zw - 1 );
|
|
|
|
// Limit the rasterizer bounds to a sensible max.
|
|
Tri.MaxPixel = min( Tri.MaxPixel, Tri.MinPixel + 63 );
|
|
|
|
// Cull when no pixels covered
|
|
if( any( Tri.MinPixel > Tri.MaxPixel ) )
|
|
Tri.bIsValid = false;
|
|
|
|
// Rebase off MinPixel with half pixel offset
|
|
// 4.8 fixed point
|
|
// Max triangle size should only be 7x7 pixels. Not sure why this works for larger triangles.
|
|
const float2 BaseSubpixel = (float2)Tri.MinPixel * SubpixelSamples + (SubpixelSamples / 2);
|
|
Vert0 -= BaseSubpixel;
|
|
Vert1 -= BaseSubpixel;
|
|
Vert2 -= BaseSubpixel;
|
|
|
|
// Half-edge constants
|
|
// 8.16 fixed point
|
|
Tri.C0 = Tri.Edge12.y * Vert1.x - Tri.Edge12.x * Vert1.y;
|
|
Tri.C1 = Tri.Edge20.y * Vert2.x - Tri.Edge20.x * Vert2.y;
|
|
Tri.C2 = Tri.Edge01.y * Vert0.x - Tri.Edge01.x * Vert0.y;
|
|
|
|
// Sum C before nudging for fill convention. Afterwards it could be zero.
|
|
const float ScaleToUnit = SubpixelSamples / ( Tri.C0 + Tri.C1 + Tri.C2 );
|
|
|
|
// Correct for fill convention
|
|
// Top left rule for CCW
|
|
#if 1
|
|
Tri.C0 -= saturate( Tri.Edge12.y + saturate( 1.0f - Tri.Edge12.x ) );
|
|
Tri.C1 -= saturate( Tri.Edge20.y + saturate( 1.0f - Tri.Edge20.x ) );
|
|
Tri.C2 -= saturate( Tri.Edge01.y + saturate( 1.0f - Tri.Edge01.x ) );
|
|
#else
|
|
Tri.C0 -= ( Tri.Edge12.y < 0 || ( Tri.Edge12.y == 0 && Tri.Edge12.x > 0 ) ) ? 0 : 1;
|
|
Tri.C1 -= ( Tri.Edge20.y < 0 || ( Tri.Edge20.y == 0 && Tri.Edge20.x > 0 ) ) ? 0 : 1;
|
|
Tri.C2 -= ( Tri.Edge01.y < 0 || ( Tri.Edge01.y == 0 && Tri.Edge01.x > 0 ) ) ? 0 : 1;
|
|
#endif
|
|
|
|
#if 0
|
|
// Step in pixel increments
|
|
// 8.16 fixed point
|
|
Tri.Edge01 *= SubpixelSamples;
|
|
Tri.Edge12 *= SubpixelSamples;
|
|
Tri.Edge20 *= SubpixelSamples;
|
|
#else
|
|
// Scale C0/C1/C2 down by SubpixelSamples instead of scaling Edge01/Edge12/Edge20 up. Lossless because SubpixelSamples is a power of two.
|
|
Tri.C0 *= (1.0f / SubpixelSamples);
|
|
Tri.C1 *= (1.0f / SubpixelSamples);
|
|
Tri.C2 *= (1.0f / SubpixelSamples);
|
|
#endif
|
|
|
|
Tri.Barycentrics_dx = float3( -Tri.Edge12.y, -Tri.Edge20.y, -Tri.Edge01.y ) * ScaleToUnit;
|
|
Tri.Barycentrics_dy = float3( Tri.Edge12.x, Tri.Edge20.x, Tri.Edge01.x ) * ScaleToUnit;
|
|
|
|
Tri.DepthPlane.x = Verts[0].z;
|
|
Tri.DepthPlane.y = Verts[1].z - Verts[0].z;
|
|
Tri.DepthPlane.z = Verts[2].z - Verts[0].z;
|
|
Tri.DepthPlane.yz *= ScaleToUnit;
|
|
|
|
return Tri;
|
|
}
|
|
|
|
template< typename FWritePixel >
|
|
void RasterizeTri_Rect( FRasterTri Tri, FWritePixel WritePixel )
|
|
{
|
|
float CY0 = Tri.C0;
|
|
float CY1 = Tri.C1;
|
|
float CY2 = Tri.C2;
|
|
|
|
int y = Tri.MinPixel.y;
|
|
while (true)
|
|
{
|
|
int x = Tri.MinPixel.x;
|
|
if (min3(CY0, CY1, CY2) >= 0)
|
|
{
|
|
WritePixel( uint2(x,y), float3(CY0, CY1, CY2), Tri );
|
|
}
|
|
|
|
if (x < Tri.MaxPixel.x)
|
|
{
|
|
float CX0 = CY0 - Tri.Edge12.y;
|
|
float CX1 = CY1 - Tri.Edge20.y;
|
|
float CX2 = CY2 - Tri.Edge01.y;
|
|
x++;
|
|
|
|
while (true)
|
|
{
|
|
if (min3(CX0, CX1, CX2) >= 0)
|
|
{
|
|
WritePixel( int2(x,y), float3(CX0, CX1, CX2), Tri );
|
|
}
|
|
|
|
if (x >= Tri.MaxPixel.x)
|
|
break;
|
|
|
|
CX0 -= Tri.Edge12.y;
|
|
CX1 -= Tri.Edge20.y;
|
|
CX2 -= Tri.Edge01.y;
|
|
x++;
|
|
}
|
|
}
|
|
|
|
if (y >= Tri.MaxPixel.y)
|
|
break;
|
|
|
|
CY0 += Tri.Edge12.x;
|
|
CY1 += Tri.Edge20.x;
|
|
CY2 += Tri.Edge01.x;
|
|
y++;
|
|
}
|
|
}
|
|
|
|
template< typename FWritePixel >
|
|
void RasterizeTri_RectSingle( FRasterTri Tri, FWritePixel WritePixel )
|
|
{
|
|
float CY0 = Tri.C0;
|
|
float CY1 = Tri.C1;
|
|
float CY2 = Tri.C2;
|
|
|
|
float CX0 = CY0;
|
|
float CX1 = CY1;
|
|
float CX2 = CY2;
|
|
|
|
int x = Tri.MinPixel.x;
|
|
int y = Tri.MinPixel.y;
|
|
|
|
while( true )
|
|
{
|
|
if( min3( CX0, CX1, CX2 ) >= 0 )
|
|
{
|
|
WritePixel( uint2(x,y), float3(CX0, CX1, CX2), Tri );
|
|
}
|
|
|
|
if( x < Tri.MaxPixel.x )
|
|
{
|
|
CX0 -= Tri.Edge12.y;
|
|
CX1 -= Tri.Edge20.y;
|
|
CX2 -= Tri.Edge01.y;
|
|
x++;
|
|
}
|
|
else if( y < Tri.MaxPixel.y )
|
|
{
|
|
CY0 += Tri.Edge12.x;
|
|
CY1 += Tri.Edge20.x;
|
|
CY2 += Tri.Edge01.x;
|
|
y++;
|
|
|
|
CX0 = CY0;
|
|
CX1 = CY1;
|
|
CX2 = CY2;
|
|
x = Tri.MinPixel.x;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
template< typename FWritePixel >
|
|
void RasterizeTri_Scanline( FRasterTri Tri, FWritePixel WritePixel )
|
|
{
|
|
float CY0 = Tri.C0;
|
|
float CY1 = Tri.C1;
|
|
float CY2 = Tri.C2;
|
|
|
|
float3 Edge012 = { Tri.Edge12.y, Tri.Edge20.y, Tri.Edge01.y };
|
|
bool3 bOpenEdge = Edge012 < 0;
|
|
float3 InvEdge012 = select( Edge012 == 0, 1e8, rcp( Edge012 ) );
|
|
|
|
int y = Tri.MinPixel.y;
|
|
while( true )
|
|
{
|
|
//float CX0 = CY0 - Edge12.y * (x - MinPixel.x);
|
|
// Edge12.y * (x - MinPixel.x) <= CY0;
|
|
|
|
/*
|
|
if( Edge12.y > 0 )
|
|
x <= CY0 / Edge12.y + MinPixel.x; // Closing edge
|
|
else
|
|
x >= CY0 / Edge12.y + MinPixel.x; // Opening edge
|
|
*/
|
|
|
|
// No longer fixed point
|
|
float3 CrossX = float3( CY0, CY1, CY2 ) * InvEdge012;
|
|
|
|
float3 MinX = select( bOpenEdge, CrossX, 0.0 );
|
|
float3 MaxX = select( bOpenEdge, Tri.MaxPixel.x - Tri.MinPixel.x, CrossX );
|
|
|
|
float x0 = ceil( max3( MinX.x, MinX.y, MinX.z ) );
|
|
float x1 = min3( MaxX.x, MaxX.y, MaxX.z );
|
|
|
|
float CX0 = CY0 - x0 * Tri.Edge12.y;
|
|
float CX1 = CY1 - x0 * Tri.Edge20.y;
|
|
float CX2 = CY2 - x0 * Tri.Edge01.y;
|
|
|
|
x0 += Tri.MinPixel.x;
|
|
x1 += Tri.MinPixel.x;
|
|
|
|
// NOTE: In some cases x0 > x1 and we need to avoid writing pixels in those situations
|
|
// or else artifacts can appear, particularly in VSM near page edges.
|
|
for (float x = x0; x <= x1; x++)
|
|
{
|
|
if (min3(CX0, CX1, CX2) >= 0)
|
|
WritePixel(uint2(x, y), float3(CX0, CX1, CX2), Tri);
|
|
|
|
CX0 -= Tri.Edge12.y;
|
|
CX1 -= Tri.Edge20.y;
|
|
CX2 -= Tri.Edge01.y;
|
|
}
|
|
|
|
if( y >= Tri.MaxPixel.y )
|
|
break;
|
|
|
|
CY0 += Tri.Edge12.x;
|
|
CY1 += Tri.Edge20.x;
|
|
CY2 += Tri.Edge01.x;
|
|
y++;
|
|
}
|
|
}
|
|
|
|
template< typename FWritePixel >
|
|
void RasterizeTri_Adaptive( FRasterTri Tri, FWritePixel WritePixel )
|
|
{
|
|
bool bScanline = NANITE_PIXEL_PROGRAMMABLE || WaveActiveAnyTrue( Tri.MaxPixel.x - Tri.MinPixel.x > 4 );
|
|
|
|
if( bScanline )
|
|
RasterizeTri_Scanline( Tri, WritePixel );
|
|
else
|
|
RasterizeTri_Rect( Tri, WritePixel );
|
|
} |