// Copyright Epic Games, Inc. All Rights Reserved. #pragma once struct FRasterTri { int2 MinPixel; int2 MaxPixel; float2 Edge01; float2 Edge12; float2 Edge20; float C0; float C1; float C2; float3 DepthPlane; float3 InvW; float3 Barycentrics_dx; float3 Barycentrics_dy; bool bIsValid; bool bBackFace; }; template< uint SubpixelSamples, bool bBackFaceCull > FRasterTri SetupTriangle( int4 ScissorRect, float4 Verts[3] ) { FRasterTri Tri; Tri.bIsValid = true; Tri.InvW = float3( Verts[0].w, Verts[1].w, Verts[2].w ); // 16.8 fixed point float2 Vert0 = Verts[0].xy; float2 Vert1 = Verts[1].xy; float2 Vert2 = Verts[2].xy; // 4.8 fixed point Tri.Edge01 = Vert0 - Vert1; Tri.Edge12 = Vert1 - Vert2; Tri.Edge20 = Vert2 - Vert0; float DetXY = Tri.Edge01.y * Tri.Edge20.x - Tri.Edge01.x * Tri.Edge20.y; Tri.bBackFace = (DetXY >= 0.0f); if( bBackFaceCull ) Tri.bIsValid = !Tri.bBackFace; BRANCH if( !bBackFaceCull && Tri.bBackFace ) { // Swap winding order Tri.Edge01 *= -1.0f; Tri.Edge12 *= -1.0f; Tri.Edge20 *= -1.0f; } // Bounding rect const float2 MinSubpixel = min3( Vert0, Vert1, Vert2 ); const float2 MaxSubpixel = max3( Vert0, Vert1, Vert2 ); // Round to nearest pixel Tri.MinPixel = (int2)floor( ( MinSubpixel + (SubpixelSamples / 2) - 1 ) * (1.0 / SubpixelSamples) ); Tri.MaxPixel = (int2)floor( ( MaxSubpixel - (SubpixelSamples / 2) - 1 ) * (1.0 / SubpixelSamples) ); // inclusive! // Scissor Tri.MinPixel = max( Tri.MinPixel, ScissorRect.xy ); Tri.MaxPixel = min( Tri.MaxPixel, ScissorRect.zw - 1 ); // Limit the rasterizer bounds to a sensible max. Tri.MaxPixel = min( Tri.MaxPixel, Tri.MinPixel + 63 ); // Cull when no pixels covered if( any( Tri.MinPixel > Tri.MaxPixel ) ) Tri.bIsValid = false; // Rebase off MinPixel with half pixel offset // 4.8 fixed point // Max triangle size should only be 7x7 pixels. Not sure why this works for larger triangles. const float2 BaseSubpixel = (float2)Tri.MinPixel * SubpixelSamples + (SubpixelSamples / 2); Vert0 -= BaseSubpixel; Vert1 -= BaseSubpixel; Vert2 -= BaseSubpixel; // Half-edge constants // 8.16 fixed point Tri.C0 = Tri.Edge12.y * Vert1.x - Tri.Edge12.x * Vert1.y; Tri.C1 = Tri.Edge20.y * Vert2.x - Tri.Edge20.x * Vert2.y; Tri.C2 = Tri.Edge01.y * Vert0.x - Tri.Edge01.x * Vert0.y; // Sum C before nudging for fill convention. Afterwards it could be zero. const float ScaleToUnit = SubpixelSamples / ( Tri.C0 + Tri.C1 + Tri.C2 ); // Correct for fill convention // Top left rule for CCW #if 1 Tri.C0 -= saturate( Tri.Edge12.y + saturate( 1.0f - Tri.Edge12.x ) ); Tri.C1 -= saturate( Tri.Edge20.y + saturate( 1.0f - Tri.Edge20.x ) ); Tri.C2 -= saturate( Tri.Edge01.y + saturate( 1.0f - Tri.Edge01.x ) ); #else Tri.C0 -= ( Tri.Edge12.y < 0 || ( Tri.Edge12.y == 0 && Tri.Edge12.x > 0 ) ) ? 0 : 1; Tri.C1 -= ( Tri.Edge20.y < 0 || ( Tri.Edge20.y == 0 && Tri.Edge20.x > 0 ) ) ? 0 : 1; Tri.C2 -= ( Tri.Edge01.y < 0 || ( Tri.Edge01.y == 0 && Tri.Edge01.x > 0 ) ) ? 0 : 1; #endif #if 0 // Step in pixel increments // 8.16 fixed point Tri.Edge01 *= SubpixelSamples; Tri.Edge12 *= SubpixelSamples; Tri.Edge20 *= SubpixelSamples; #else // Scale C0/C1/C2 down by SubpixelSamples instead of scaling Edge01/Edge12/Edge20 up. Lossless because SubpixelSamples is a power of two. Tri.C0 *= (1.0f / SubpixelSamples); Tri.C1 *= (1.0f / SubpixelSamples); Tri.C2 *= (1.0f / SubpixelSamples); #endif Tri.Barycentrics_dx = float3( -Tri.Edge12.y, -Tri.Edge20.y, -Tri.Edge01.y ) * ScaleToUnit; Tri.Barycentrics_dy = float3( Tri.Edge12.x, Tri.Edge20.x, Tri.Edge01.x ) * ScaleToUnit; Tri.DepthPlane.x = Verts[0].z; Tri.DepthPlane.y = Verts[1].z - Verts[0].z; Tri.DepthPlane.z = Verts[2].z - Verts[0].z; Tri.DepthPlane.yz *= ScaleToUnit; return Tri; } template< typename FWritePixel > void RasterizeTri_Rect( FRasterTri Tri, FWritePixel WritePixel ) { float CY0 = Tri.C0; float CY1 = Tri.C1; float CY2 = Tri.C2; int y = Tri.MinPixel.y; while (true) { int x = Tri.MinPixel.x; if (min3(CY0, CY1, CY2) >= 0) { WritePixel( uint2(x,y), float3(CY0, CY1, CY2), Tri ); } if (x < Tri.MaxPixel.x) { float CX0 = CY0 - Tri.Edge12.y; float CX1 = CY1 - Tri.Edge20.y; float CX2 = CY2 - Tri.Edge01.y; x++; while (true) { if (min3(CX0, CX1, CX2) >= 0) { WritePixel( int2(x,y), float3(CX0, CX1, CX2), Tri ); } if (x >= Tri.MaxPixel.x) break; CX0 -= Tri.Edge12.y; CX1 -= Tri.Edge20.y; CX2 -= Tri.Edge01.y; x++; } } if (y >= Tri.MaxPixel.y) break; CY0 += Tri.Edge12.x; CY1 += Tri.Edge20.x; CY2 += Tri.Edge01.x; y++; } } template< typename FWritePixel > void RasterizeTri_RectSingle( FRasterTri Tri, FWritePixel WritePixel ) { float CY0 = Tri.C0; float CY1 = Tri.C1; float CY2 = Tri.C2; float CX0 = CY0; float CX1 = CY1; float CX2 = CY2; int x = Tri.MinPixel.x; int y = Tri.MinPixel.y; while( true ) { if( min3( CX0, CX1, CX2 ) >= 0 ) { WritePixel( uint2(x,y), float3(CX0, CX1, CX2), Tri ); } if( x < Tri.MaxPixel.x ) { CX0 -= Tri.Edge12.y; CX1 -= Tri.Edge20.y; CX2 -= Tri.Edge01.y; x++; } else if( y < Tri.MaxPixel.y ) { CY0 += Tri.Edge12.x; CY1 += Tri.Edge20.x; CY2 += Tri.Edge01.x; y++; CX0 = CY0; CX1 = CY1; CX2 = CY2; x = Tri.MinPixel.x; } else { break; } } } template< typename FWritePixel > void RasterizeTri_Scanline( FRasterTri Tri, FWritePixel WritePixel ) { float CY0 = Tri.C0; float CY1 = Tri.C1; float CY2 = Tri.C2; float3 Edge012 = { Tri.Edge12.y, Tri.Edge20.y, Tri.Edge01.y }; bool3 bOpenEdge = Edge012 < 0; float3 InvEdge012 = select( Edge012 == 0, 1e8, rcp( Edge012 ) ); int y = Tri.MinPixel.y; while( true ) { //float CX0 = CY0 - Edge12.y * (x - MinPixel.x); // Edge12.y * (x - MinPixel.x) <= CY0; /* if( Edge12.y > 0 ) x <= CY0 / Edge12.y + MinPixel.x; // Closing edge else x >= CY0 / Edge12.y + MinPixel.x; // Opening edge */ // No longer fixed point float3 CrossX = float3( CY0, CY1, CY2 ) * InvEdge012; float3 MinX = select( bOpenEdge, CrossX, 0.0 ); float3 MaxX = select( bOpenEdge, Tri.MaxPixel.x - Tri.MinPixel.x, CrossX ); float x0 = ceil( max3( MinX.x, MinX.y, MinX.z ) ); float x1 = min3( MaxX.x, MaxX.y, MaxX.z ); float CX0 = CY0 - x0 * Tri.Edge12.y; float CX1 = CY1 - x0 * Tri.Edge20.y; float CX2 = CY2 - x0 * Tri.Edge01.y; x0 += Tri.MinPixel.x; x1 += Tri.MinPixel.x; // NOTE: In some cases x0 > x1 and we need to avoid writing pixels in those situations // or else artifacts can appear, particularly in VSM near page edges. for (float x = x0; x <= x1; x++) { if (min3(CX0, CX1, CX2) >= 0) WritePixel(uint2(x, y), float3(CX0, CX1, CX2), Tri); CX0 -= Tri.Edge12.y; CX1 -= Tri.Edge20.y; CX2 -= Tri.Edge01.y; } if( y >= Tri.MaxPixel.y ) break; CY0 += Tri.Edge12.x; CY1 += Tri.Edge20.x; CY2 += Tri.Edge01.x; y++; } } template< typename FWritePixel > void RasterizeTri_Adaptive( FRasterTri Tri, FWritePixel WritePixel ) { bool bScanline = NANITE_PIXEL_PROGRAMMABLE || WaveActiveAnyTrue( Tri.MaxPixel.x - Tri.MinPixel.x > 4 ); if( bScanline ) RasterizeTri_Scanline( Tri, WritePixel ); else RasterizeTri_Rect( Tri, WritePixel ); }