318 lines
10 KiB
HLSL
318 lines
10 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "Common.ush"
|
|
#include "DeferredShadingCommon.ush"
|
|
#include "BRDF.ush"
|
|
#include "ScreenSpaceReflectionTileCommons.ush"
|
|
|
|
#include "Substrate/Substrate.ush"
|
|
#include "Substrate/SubstrateEvaluation.ush"
|
|
|
|
// Tiling for pixels that require SSR
|
|
|
|
#ifndef TILE_CATERGORISATION_SHADER
|
|
#define TILE_CATERGORISATION_SHADER 1
|
|
#endif
|
|
|
|
#if TILE_CATERGORISATION_SHADER
|
|
|
|
#if USE_SSR_PRE_PASS_STENCIL
|
|
Texture2D<uint2> SSRDepthStencilTexture;
|
|
#endif
|
|
|
|
float MaxRoughness;
|
|
float MinSpecular;
|
|
int bEnableTwoSidedFoliage;
|
|
|
|
int2 TiledViewRes;
|
|
RWStructuredBuffer<uint> TileMaskBufferOut;
|
|
|
|
#if COMPILER_SUPPORTS_WAVE_VOTE
|
|
groupshared uint bAnySSRPixels;
|
|
#else
|
|
groupshared bool ContainsSSR[SSR_TILE_SIZE_XY * SSR_TILE_SIZE_XY];
|
|
#endif
|
|
|
|
bool DoesPixelContainSSR(uint2 PixelPos)
|
|
{
|
|
#if USE_SSR_PRE_PASS_STENCIL
|
|
uint Stencil = SSRDepthStencilTexture.Load(uint3(PixelPos, 0)) STENCIL_COMPONENT_SWIZZLE;
|
|
return (Stencil & 1U) != 0U;
|
|
#else // !USE_SSR_PRE_PASS_STENCIL
|
|
#if SUBTRATE_GBUFFER_FORMAT==1
|
|
FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(PixelPos, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel);
|
|
FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture);
|
|
const FSubstrateTopLayerData TopLayerData = SubstrateUnpackTopLayerData(SubstratePixelHeader.PackedTopLayerData);
|
|
|
|
bool bRoughnessIsValid = TopLayerData.Roughness <= MaxRoughness;
|
|
bool bContainValidPixels = SubstratePixelHeader.ClosureCount > 0 && bRoughnessIsValid;
|
|
BRANCH
|
|
if (SubstratePixelHeader.ClosureCount == 1) // Only non complex materials will be interpreted for exclusion for now when it comes to Specular values or legacy shading models.
|
|
{
|
|
const FSubstrateSubsurfaceHeader SSSHeader = SubstrateLoadSubsurfaceHeader(Substrate.MaterialTextureArray, Substrate.FirstSliceStoringSubstrateSSSData, SubstrateAddressing.PixelCoords);
|
|
|
|
const bool bSkipSSSMaterialOverride = true; // We do not want the material data to be affected by any override.
|
|
FSubstrateBSDF BSDF = UnpackSubstrateBSDFIn(Substrate.MaterialTextureArray, SubstrateAddressing, SubstratePixelHeader, bSkipSSSMaterialOverride);
|
|
|
|
const float BSDFSpecular = F0ToDielectricSpecular(max3(SLAB_F0(BSDF).x, SLAB_F0(BSDF).y, SLAB_F0(BSDF).z));
|
|
|
|
bContainValidPixels =
|
|
bRoughnessIsValid
|
|
&& BSDFSpecular >= MinSpecular
|
|
&& !SubstratePixelHeader.IsHair()
|
|
&& !SubstratePixelHeader.IsSingleLayerWater();
|
|
|
|
bContainValidPixels = bContainValidPixels && (bEnableTwoSidedFoliage ? true : (BSDF_GETSSSTYPE(BSDF) != SSS_TYPE_TWO_SIDED_WRAP));
|
|
|
|
const uint BSDFType = BSDF_GETTYPE(BSDF);
|
|
switch (BSDFType)
|
|
{
|
|
case SUBSTRATE_BSDF_TYPE_SLAB:
|
|
{
|
|
// Cull special case where specular=0, and roughness=0
|
|
if (SLAB_ROUGHNESS(BSDF) == 0 && BSDFSpecular == 0)
|
|
{
|
|
bContainValidPixels = false;
|
|
}
|
|
|
|
// Clearcoat: if the material is clear coat it contains valid pixel.
|
|
const bool bHaziness = BSDF_GETHASHAZINESS(BSDF);
|
|
if (bHaziness)
|
|
{
|
|
const FHaziness Haziness = UnpackHaziness(SLAB_HAZINESS(BSDF));
|
|
const bool bHazeAsSimpleClearCoat = Haziness.bSimpleClearCoat;
|
|
bContainValidPixels = bContainValidPixels || bHazeAsSimpleClearCoat;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return bContainValidPixels;
|
|
#else
|
|
float2 BufferUV = (PixelPos + 0.5f) * View.BufferSizeAndInvSize.zw;
|
|
FGBufferData GBuffer = GetGBufferDataFromSceneTextures(BufferUV);
|
|
|
|
bool bContainValidPixels =
|
|
GBuffer.Roughness <= MaxRoughness
|
|
&& GBuffer.Specular >= MinSpecular
|
|
&& (GBuffer.ShadingModelID != SHADINGMODELID_UNLIT)
|
|
&& (GBuffer.ShadingModelID != SHADINGMODELID_HAIR)
|
|
&& (GBuffer.ShadingModelID != SHADINGMODELID_SINGLELAYERWATER);
|
|
|
|
// Ignore two sided foliage
|
|
bContainValidPixels = bContainValidPixels && (bEnableTwoSidedFoliage ? true : (GBuffer.ShadingModelID != SHADINGMODELID_TWOSIDED_FOLIAGE));
|
|
|
|
// Cull special case where specular=0, and roughness=0
|
|
if (GBuffer.Roughness == 0 && GBuffer.Specular == 0)
|
|
{
|
|
bContainValidPixels = false;
|
|
}
|
|
|
|
// Clearcoat: if the material is clear coat it contains valid pixel.
|
|
bContainValidPixels = bContainValidPixels || (GBuffer.ShadingModelID == SHADINGMODELID_CLEAR_COAT);
|
|
|
|
return bContainValidPixels;
|
|
#endif
|
|
#endif // USE_SSR_PRE_PASS_STENCIL
|
|
}
|
|
|
|
/**
|
|
* Check all GBuffer pixels and set 1 bit for any 8x8 tile which contains a water pixel
|
|
*/
|
|
[numthreads(SSR_TILE_SIZE_XY, SSR_TILE_SIZE_XY, 1)]
|
|
void SSRTileCategorisationMarkCS(uint3 ThreadId : SV_DispatchThreadID, uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
#if 0
|
|
// Slow reference path
|
|
const int LinearIndex = GroupThreadId.y * SSR_TILE_SIZE_XY + GroupThreadId.x;
|
|
if (LinearIndex < 1)
|
|
{
|
|
bool bContainsSSR = false;
|
|
for (uint i = 0; i < SSR_TILE_SIZE_XY; ++i)
|
|
{
|
|
for (uint j = 0; j < SSR_TILE_SIZE_XY; ++j)
|
|
{
|
|
bContainsSSR = bContainsSSR || DoesPixelContainSSR((ThreadId.xy + uint2(i, j)) + View.ViewRectMin.xy);
|
|
}
|
|
}
|
|
if (bContainsSSR)
|
|
{
|
|
uint WriteToIndex;
|
|
InterlockedAdd(DrawIndirectDataUAV[1], 1, WriteToIndex);
|
|
InterlockedAdd(DispatchIndirectDataUAV[0], 1);
|
|
// Encoding needs to match Lumen reflection tile encoding (see LumenReflection.usf)
|
|
SSRTileListDataUAV[WriteToIndex] = PackTileCoord12bits(GroupId.xy);
|
|
}
|
|
}
|
|
|
|
#else
|
|
|
|
bool bContainsSSR = false;
|
|
if (ThreadId.x < uint(View.BufferSizeAndInvSize.x) && ThreadId.y < uint(View.BufferSizeAndInvSize.y))
|
|
{
|
|
bContainsSSR = bContainsSSR || DoesPixelContainSSR(ThreadId.xy + View.ViewRectMin.xy);
|
|
}
|
|
|
|
bool bWriteTile = false;
|
|
|
|
#if COMPILER_SUPPORTS_WAVE_VOTE
|
|
|
|
if (all(GroupThreadId == 0))
|
|
{
|
|
bAnySSRPixels = 0;
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
const bool bAnySSRPixelsInWave = WaveActiveAnyTrue(bContainsSSR);
|
|
if (WaveIsFirstLane() && bAnySSRPixelsInWave)
|
|
{
|
|
InterlockedAdd(bAnySSRPixels, 1);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (all(GroupThreadId == 0))
|
|
{
|
|
bWriteTile = bAnySSRPixels > 0;
|
|
}
|
|
|
|
#else
|
|
|
|
const int LinearIndex = GroupThreadId.y * SSR_TILE_SIZE_XY + GroupThreadId.x;
|
|
ContainsSSR[LinearIndex] = bContainsSSR;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (LinearIndex < 32) // 8*8 = 64 elements to merge
|
|
{
|
|
ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 32];
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (LinearIndex < 16)
|
|
{
|
|
ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 16];
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// The smallest wave size is 16 on Intel hardware. So now we can do simple math operations without group sync.
|
|
// EDIT: for some reason group sync is needed until the end, otherwise some pixels are missing...
|
|
|
|
if (LinearIndex < 8)
|
|
{
|
|
ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 8];
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (LinearIndex < 4)
|
|
{
|
|
ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 4];
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (LinearIndex < 2)
|
|
{
|
|
ContainsSSR[LinearIndex] = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 2];
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (LinearIndex < 1)
|
|
{
|
|
bWriteTile = ContainsSSR[LinearIndex] || ContainsSSR[LinearIndex + 1];
|
|
}
|
|
|
|
#endif
|
|
|
|
if (bWriteTile)
|
|
{
|
|
// Set bit to indicate tile is occupied.
|
|
uint MaskLinearIndex = TiledViewRes.x * GroupId.y + GroupId.x;
|
|
InterlockedOr(TileMaskBufferOut[MaskLinearIndex / 32U], 1U << (MaskLinearIndex % 32U));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
uint VertexCountPerInstanceIndirect;
|
|
RWBuffer<uint> DrawIndirectDataUAV;
|
|
RWBuffer<uint> DispatchIndirectDataUAV;
|
|
RWBuffer<uint> SSRTileListDataUAV;
|
|
StructuredBuffer<uint> TileMaskBuffer;
|
|
|
|
groupshared uint SharedNumTiles;
|
|
groupshared uint SharedTileData[SSR_TILE_SIZE_XY * SSR_TILE_SIZE_XY];
|
|
groupshared uint SharedGlobalTileOffset;
|
|
|
|
/**
|
|
* Every group checks 64 tiles and builds a spatially coherent compacted list of SSR tiles
|
|
*/
|
|
[numthreads(SSR_TILE_SIZE_XY, SSR_TILE_SIZE_XY, 1)]
|
|
void SSRTileClassificationBuildListsCS(
|
|
uint2 GroupId : SV_GroupID,
|
|
uint2 DispatchThreadId : SV_DispatchThreadID,
|
|
uint2 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
const uint LinearThreadIndex = GroupThreadId.y * SSR_TILE_SIZE_XY + GroupThreadId.x;
|
|
|
|
if (all(DispatchThreadId == 0))
|
|
{
|
|
// TODO compute clear myself
|
|
DrawIndirectDataUAV[0] = VertexCountPerInstanceIndirect; // VertexCountPerInstance
|
|
//DrawIndirectDataUAV[1] // InstanceCount already cleared to 0
|
|
//DrawIndirectDataUAV[2] = 0; // StartVertexLocation " "
|
|
//DrawIndirectDataUAV[3] = 0; // StartInstanceLocation " "
|
|
|
|
DispatchIndirectDataUAV[1] = 1;
|
|
DispatchIndirectDataUAV[2] = 1;
|
|
}
|
|
|
|
if (LinearThreadIndex == 0)
|
|
{
|
|
SharedNumTiles = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
//@todo - parallel version
|
|
if (LinearThreadIndex == 0)
|
|
{
|
|
SharedNumTiles = 0;
|
|
|
|
for (uint LocalTileIndex = 0; LocalTileIndex < SSR_TILE_SIZE_XY * SSR_TILE_SIZE_XY; ++LocalTileIndex)
|
|
{
|
|
// ZOrder tiles to maximize screen locality after converting to 1d for compaction
|
|
// The tile locality ultimately affects trace coherency, since trace compaction pulls from neighboring tiles
|
|
uint2 ThreadOffset = ZOrder2D(LocalTileIndex, log2(SSR_TILE_SIZE_XY));
|
|
uint2 TileCoordinate = GroupId * SSR_TILE_SIZE_XY + ThreadOffset;
|
|
|
|
if (all(TileCoordinate < TiledViewRes))
|
|
{
|
|
uint MaskLinearIndex = TiledViewRes.x * TileCoordinate.y + TileCoordinate.x;
|
|
uint Mask = 1u << (MaskLinearIndex % 32u);
|
|
bool bTileUsed = (TileMaskBuffer[MaskLinearIndex / 32u] & Mask) != 0;
|
|
if (bTileUsed)
|
|
{
|
|
uint TileOffset = SharedNumTiles;
|
|
// Encoding needs to match Lumen reflection tile encoding (see LumenReflection.usf)
|
|
SharedTileData[TileOffset] = PackTileCoord12bits(TileCoordinate);
|
|
SharedNumTiles = TileOffset + 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Allocate space in the tile list
|
|
if (LinearThreadIndex == 0 && SharedNumTiles > 0)
|
|
{
|
|
InterlockedAdd(DrawIndirectDataUAV[1], SharedNumTiles, SharedGlobalTileOffset);
|
|
InterlockedAdd(DispatchIndirectDataUAV[0], SharedNumTiles);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Write out tiles
|
|
if (LinearThreadIndex < SharedNumTiles)
|
|
{
|
|
SSRTileListDataUAV[SharedGlobalTileOffset + LinearThreadIndex] = SharedTileData[LinearThreadIndex];
|
|
}
|
|
}
|
|
|
|
#endif |