702 lines
23 KiB
HLSL
702 lines
23 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
SingleLayerWaterComposite - final step of the single layer water
|
|
=============================================================================*/
|
|
|
|
#include "Common.ush"
|
|
#include "DeferredShadingCommon.ush"
|
|
#include "BRDF.ush"
|
|
#include "ReflectionEnvironmentShared.ush"
|
|
#include "ShadingModels.ush"
|
|
#include "SceneTextureParameters.ush"
|
|
#include "LightGridCommon.ush"
|
|
#include "/Engine/Shared/SingleLayerWaterDefinitions.h"
|
|
#include "SingleLayerWaterCommon.ush"
|
|
|
|
#define ENABLE_SKY_LIGHT 1
|
|
|
|
#define REFLECTION_COMPOSITE_USE_BLENDED_REFLECTION_CAPTURES (FEATURE_LEVEL >= FEATURE_LEVEL_SM5)
|
|
#define REFLECTION_COMPOSITE_SUPPORT_SKYLIGHT_BLEND 0
|
|
#include "ReflectionEnvironmentComposite.ush"
|
|
#include "Substrate/SubstrateEvaluation.ush"
|
|
|
|
#if GENERATE_FROXELS
|
|
|
|
|
|
#define FROXEL_HASH_BUFFER_SIZE 64
|
|
#define FROXEL_HASH_THREAD_GROUP_SIZE 64
|
|
#define FROXEL_SHARED_HASH_BUFFER_VAR GroupShared_FroxelHashBuffer
|
|
|
|
groupshared uint GroupShared_FroxelHashBuffer[FROXEL_HASH_BUFFER_SIZE];
|
|
|
|
#include "Froxel/FroxelBuild.ush"
|
|
|
|
|
|
#endif
|
|
|
|
#ifndef THREADGROUP_SIZE
|
|
#define THREADGROUP_SIZE 1
|
|
#endif
|
|
|
|
Texture2D ScreenSpaceReflectionsTexture;
|
|
SamplerState ScreenSpaceReflectionsSampler;
|
|
|
|
Texture2D SceneNoWaterDepthTexture;
|
|
SamplerState SceneNoWaterDepthSampler;
|
|
|
|
Texture2D<float4> SeparatedMainDirLightTexture;
|
|
|
|
float4 SceneNoWaterMinMaxUV;
|
|
float2 SceneNoWaterTextureSize;
|
|
float2 SceneNoWaterInvTextureSize;
|
|
float UseSeparatedMainDirLightTexture;
|
|
|
|
struct SingleLayerWaterCompositeOutput
|
|
{
|
|
float4 LuminanceTransmittance;
|
|
float Clip;
|
|
};
|
|
|
|
SingleLayerWaterCompositeOutput SingleLayerWaterComposite(float2 BufferUV, float2 ScreenPosition, float4 SvPosition)
|
|
{
|
|
const uint2 PixelPos = uint2(SvPosition.xy);
|
|
|
|
// No AO or DFAO
|
|
|
|
const float3 OneThird3 = float3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f);
|
|
const bool CameraIsUnderWater = false;
|
|
|
|
SingleLayerWaterCompositeOutput Output;
|
|
Output.LuminanceTransmittance = float4(0.0f, 0.0f, 0.0f, 1.0f);
|
|
Output.Clip = 1.0f; // no clipping
|
|
|
|
float3 TransmittanceToScene = 1.0f;
|
|
float3 SeparatedMainDirLightContribution = 0.0f;
|
|
|
|
float4 EffectiveSceneNoWaterMinMaxUV = SceneNoWaterMinMaxUV;
|
|
|
|
#if SUBTRATE_GBUFFER_FORMAT==1
|
|
|
|
// Water do nto read the classification data to avoid running it a second time when water is enabled.
|
|
FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(SvPosition.xy, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel);
|
|
FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture);
|
|
if (SubstratePixelHeader.ClosureCount > 0 && SubstratePixelHeader.IsSingleLayerWater())
|
|
{
|
|
float DeviceZ = SampleDeviceZFromSceneTextures(BufferUV);
|
|
float SceneDepth = ConvertFromDeviceZ(DeviceZ);
|
|
|
|
const float2 SceneNoWaterUV = clamp(BufferUV, EffectiveSceneNoWaterMinMaxUV.xy, EffectiveSceneNoWaterMinMaxUV.zw);
|
|
|
|
float OpaqueDeviceZ = WaterSampleSceneDepthWithoutWater(
|
|
SceneNoWaterDepthTexture,
|
|
SceneNoWaterDepthSampler,
|
|
SceneNoWaterUV,
|
|
SceneNoWaterTextureSize,
|
|
SceneNoWaterInvTextureSize);
|
|
float OpaqueDepth = ConvertFromDeviceZ(OpaqueDeviceZ);
|
|
|
|
float WaterDepth = SceneDepth;
|
|
|
|
float DeltaDepth = CameraIsUnderWater ? WaterDepth : OpaqueDepth - WaterDepth; // Inverted depth
|
|
if (DeltaDepth > 0.0)
|
|
{
|
|
const FSubstrateBSDF SLWaterBSDF = UnpackSubstrateBSDFIn(Substrate.MaterialTextureArray, SubstrateAddressing, SubstratePixelHeader);
|
|
const float3 WorldNormal = SubstrateGetBSDFSharedBasis(SubstratePixelHeader, SLWaterBSDF, SubstrateAddressing)[2];
|
|
const float3 F0 = ComputeF0(SLW_SPECULAR(SLWaterBSDF), SLW_BASECOLOR(SLWaterBSDF), SLW_METALLIC(SLWaterBSDF));
|
|
const float Roughness = SLW_ROUGHNESS(SLWaterBSDF);
|
|
const float SafeRoughness = MakeRoughnessSafe(Roughness);
|
|
|
|
if (UseSeparatedMainDirLightTexture > 0.0f)
|
|
{
|
|
// Also compose the separated main direction light luminance (separated to be able to combine it with DistanceFieldShadow)
|
|
SeparatedMainDirLightContribution = SeparatedMainDirLightTexture[PixelPos].rgb;
|
|
}
|
|
|
|
// Compute the sky reflection contribution
|
|
float3 Reflection = 0.0f;
|
|
|
|
// TODO MAKE COMMON WITH ReflectionEnvironment?
|
|
float3 TranslatedWorldPosition = mul(float4(GetScreenPositionForProjectionType(ScreenPosition, WaterDepth), WaterDepth, 1), View.ScreenToTranslatedWorld).xyz;
|
|
float3 CameraToPixel = GetCameraVectorFromTranslatedWorldPosition(TranslatedWorldPosition);
|
|
float3 ReflectionVector = reflect(CameraToPixel, WorldNormal);
|
|
float IndirectIrradiance = 0.0;
|
|
float IndirectSpecularOcclusion = 1.0f;
|
|
float3 ExtraIndirectSpecular = 0;
|
|
|
|
//support reflection captures
|
|
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5
|
|
float2 LocalPosition = SvPosition.xy - View.ViewRectMin.xy;
|
|
|
|
uint GridIndex = ComputeLightGridCellIndex(uint2(LocalPosition.x, LocalPosition.y), WaterDepth, 0);
|
|
|
|
FCulledReflectionCapturesGridHeader CulledReflectionCapturesGridHeader = GetCulledReflectionCapturesGridHeader(GridIndex);
|
|
uint NumCulledReflectionCaptures = CulledReflectionCapturesGridHeader.NumReflectionCaptures;
|
|
uint CaptureDataStartIndex = CulledReflectionCapturesGridHeader.DataStartIndex;
|
|
#else
|
|
uint NumCulledReflectionCaptures = 0;
|
|
uint CaptureDataStartIndex = 0;
|
|
#endif
|
|
|
|
//
|
|
float3 N = WorldNormal;
|
|
float3 V = -CameraToPixel;
|
|
float3 R = 2 * dot(V, N) * N - V;
|
|
float NoV = saturate(dot(N, V));
|
|
// Point lobe in off-specular peak direction
|
|
R = GetOffSpecularPeakReflectionDir(N, R, SafeRoughness);
|
|
const bool bCompositeSkylight = true;
|
|
Reflection += View.PreExposure * CompositeReflectionCapturesAndSkylightTWS(
|
|
1,
|
|
TranslatedWorldPosition,
|
|
R,
|
|
SafeRoughness,
|
|
IndirectIrradiance,
|
|
IndirectSpecularOcclusion,
|
|
ExtraIndirectSpecular,
|
|
NumCulledReflectionCaptures,
|
|
CaptureDataStartIndex,
|
|
0,
|
|
bCompositeSkylight);
|
|
|
|
// Then combine reflection with SSR
|
|
float4 SSR = Texture2DSample(ScreenSpaceReflectionsTexture, ScreenSpaceReflectionsSampler, BufferUV);
|
|
Reflection = SSR.rgb + Reflection * (1 - SSR.a);
|
|
|
|
const float3 BSDFCoverage = 1.0f; // Since the SLW BSDF is always isolated and composited onto the Substrate material buffer, this is assumed to be 1 here.
|
|
|
|
// The specular over the water in completely in the hand of the user. We do not fade out metalness for instance.
|
|
float3 EnvBrdfValue = BSDFCoverage * EnvBRDF(F0, SafeRoughness, saturate(NoV));
|
|
|
|
#else // SUBTRATE_GBUFFER_FORMAT==1
|
|
|
|
// Sample scene textures.
|
|
FGBufferData GBuffer = GetGBufferDataFromSceneTextures(BufferUV);
|
|
uint ShadingModelID = GBuffer.ShadingModelID;
|
|
|
|
if (ShadingModelID == SHADINGMODELID_SINGLELAYERWATER)
|
|
{
|
|
const float2 SceneNoWaterUV = clamp(BufferUV, EffectiveSceneNoWaterMinMaxUV.xy, EffectiveSceneNoWaterMinMaxUV.zw);
|
|
|
|
float OpaqueDeviceZ = WaterSampleSceneDepthWithoutWater(
|
|
SceneNoWaterDepthTexture,
|
|
SceneNoWaterDepthSampler,
|
|
SceneNoWaterUV,
|
|
SceneNoWaterTextureSize,
|
|
SceneNoWaterInvTextureSize);
|
|
float OpaqueDepth = ConvertFromDeviceZ(OpaqueDeviceZ);
|
|
|
|
float WaterDepth = GBuffer.Depth;
|
|
|
|
float DeltaDepth = CameraIsUnderWater ? WaterDepth : OpaqueDepth - WaterDepth; // Inverted depth
|
|
if (DeltaDepth > 0.0)
|
|
{
|
|
// Compute the sky reflection contribution
|
|
float3 Reflection = 0.0f;
|
|
|
|
// TODO MAKE COMMON WITH ReflectionEnvironment?
|
|
const float3 TranslatedWorldPosition = mul(float4(GetScreenPositionForProjectionType(ScreenPosition, GBuffer.Depth), GBuffer.Depth, 1), View.ScreenToTranslatedWorld).xyz;
|
|
float IndirectIrradiance = 0.0;// GBuffer.IndirectIrradiance;
|
|
float IndirectSpecularOcclusion = 1.0f;
|
|
float3 ExtraIndirectSpecular = 0;
|
|
|
|
//support reflection captures
|
|
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5
|
|
float2 LocalPosition = SvPosition.xy - View.ViewRectMin.xy;
|
|
|
|
uint GridIndex = ComputeLightGridCellIndex(uint2(LocalPosition.x, LocalPosition.y), WaterDepth, 0);
|
|
|
|
FCulledReflectionCapturesGridHeader CulledReflectionCapturesGridHeader = GetCulledReflectionCapturesGridHeader(GridIndex);
|
|
uint NumCulledReflectionCaptures = CulledReflectionCapturesGridHeader.NumReflectionCaptures;
|
|
uint CaptureDataStartIndex = CulledReflectionCapturesGridHeader.DataStartIndex;
|
|
#else
|
|
uint NumCulledReflectionCaptures = 0;
|
|
uint CaptureDataStartIndex = 0;
|
|
#endif
|
|
|
|
//
|
|
float3 N = GBuffer.WorldNormal;
|
|
float3 V = -GetCameraVectorFromTranslatedWorldPosition(TranslatedWorldPosition);
|
|
float3 R = 2 * dot(V, N) * N - V;
|
|
float NoV = saturate(dot(N, V));
|
|
// Point lobe in off-specular peak direction
|
|
R = GetOffSpecularPeakReflectionDir(N, R, GBuffer.Roughness);
|
|
const bool bCompositeSkylight = true;
|
|
Reflection += View.PreExposure * CompositeReflectionCapturesAndSkylightTWS(
|
|
1,
|
|
TranslatedWorldPosition,
|
|
R,
|
|
GBuffer.Roughness,
|
|
IndirectIrradiance,
|
|
IndirectSpecularOcclusion,
|
|
ExtraIndirectSpecular,
|
|
NumCulledReflectionCaptures,
|
|
CaptureDataStartIndex,
|
|
0,
|
|
bCompositeSkylight);
|
|
|
|
// Then combine reflection with SSR
|
|
float4 SSR = Texture2DSample(ScreenSpaceReflectionsTexture, ScreenSpaceReflectionsSampler, BufferUV);
|
|
Reflection = SSR.rgb + Reflection * (1 - SSR.a);
|
|
|
|
// Apply the BRDF reflection integral to reflection.
|
|
// BRDF/Fresnel is already applied for the under water part in the scene.
|
|
float3 EnvBrdfValue = EnvBRDF(GBuffer.SpecularColor, GBuffer.Roughness, NoV);
|
|
|
|
if (UseSeparatedMainDirLightTexture > 0.0f)
|
|
{
|
|
// Also compose the separated main direction light luminance (separated to be able to combine it with DistanceFieldShadow)
|
|
SeparatedMainDirLightContribution = SeparatedMainDirLightTexture[PixelPos].rgb;
|
|
}
|
|
|
|
#endif // SUBTRATE_GBUFFER_FORMAT==1
|
|
|
|
Reflection *= EnvBrdfValue;
|
|
|
|
// Soft fading near shor to avoid seeing triangles.
|
|
const float ShoreOpacity = saturate(DeltaDepth * 0.02f);
|
|
Reflection.rgb *= ShoreOpacity;
|
|
|
|
// Apply transmittance to reflection if under water
|
|
if (CameraIsUnderWater)
|
|
{
|
|
TransmittanceToScene *= 1.0 - EnvBrdfValue;
|
|
|
|
// Using default under water material: compute transmittance and scattering
|
|
float3 WaterMediumScattering = 0.0f;
|
|
float3 WaterMediumTransmittance = float3(0.1, 0.1, 0.8);
|
|
|
|
Reflection *= WaterMediumTransmittance;
|
|
TransmittanceToScene *= WaterMediumTransmittance;
|
|
}
|
|
|
|
Output.LuminanceTransmittance = float4(Reflection + SeparatedMainDirLightContribution, dot(OneThird3, TransmittanceToScene));
|
|
}
|
|
else
|
|
{
|
|
Output.Clip = -1.0f;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Output.Clip = -1.0f;
|
|
}
|
|
|
|
return Output;
|
|
}
|
|
|
|
|
|
|
|
#if TILE_CATERGORISATION_SHADER
|
|
|
|
#if USE_WATER_PRE_PASS_STENCIL
|
|
Texture2D<uint2> WaterDepthStencilTexture;
|
|
Texture2D<float> WaterDepthTexture;
|
|
#endif
|
|
|
|
int2 TiledViewRes;
|
|
RWStructuredBuffer<uint> TileMaskBufferOut;
|
|
|
|
#if COMPILER_SUPPORTS_WAVE_VOTE
|
|
groupshared uint bAnyWaterPixels;
|
|
#else
|
|
groupshared bool ContainsWater[SLW_TILE_SIZE_XY * SLW_TILE_SIZE_XY];
|
|
#endif
|
|
|
|
bool DoesPixelContainWater(uint2 PixelPos)
|
|
{
|
|
#if USE_WATER_PRE_PASS_STENCIL
|
|
uint Stencil = WaterDepthStencilTexture.Load(uint3(PixelPos, 0)) STENCIL_COMPONENT_SWIZZLE;
|
|
return (Stencil & 1U) != 0U;
|
|
#else // !USE_WATER_PRE_PASS_STENCIL
|
|
#if SUBTRATE_GBUFFER_FORMAT==1
|
|
FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(PixelPos, uint2(View.BufferSizeAndInvSize.xy), Substrate.MaxBytesPerPixel);
|
|
FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(Substrate.MaterialTextureArray, SubstrateAddressing, Substrate.TopLayerTexture);
|
|
return (SubstratePixelHeader.ClosureCount > 0 && SubstratePixelHeader.IsSingleLayerWater());
|
|
#else
|
|
float2 BufferUV = (PixelPos + 0.5f) * View.BufferSizeAndInvSize.zw;
|
|
FGBufferData GBuffer = GetGBufferDataFromSceneTextures(BufferUV);
|
|
return (GBuffer.ShadingModelID == SHADINGMODELID_SINGLELAYERWATER);
|
|
#endif
|
|
#endif // USE_WATER_PRE_PASS_STENCIL
|
|
}
|
|
|
|
/**
|
|
* Check all GBuffer pixels and set 1 bit for any 8x8 tile which contains a water pixel
|
|
*/
|
|
[numthreads(SLW_TILE_SIZE_XY, SLW_TILE_SIZE_XY, 1)]
|
|
void WaterTileCatergorisationMarkCS(uint3 ThreadId : SV_DispatchThreadID, uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
const uint GroupThreadIndex = GroupThreadId.y * SLW_TILE_SIZE_XY + GroupThreadId.x;
|
|
#if 0
|
|
// Slow reference path
|
|
if (GroupThreadIndex < 1u)
|
|
{
|
|
bool bContainsWater = false;
|
|
for (uint i = 0; i < SLW_TILE_SIZE_XY; ++i)
|
|
{
|
|
for (uint j = 0; j < SLW_TILE_SIZE_XY; ++j)
|
|
{
|
|
bContainsWater = bContainsWater || DoesPixelContainWater((ThreadId.xy + uint2(i, j)) + View.ViewRectMin.xy);
|
|
}
|
|
}
|
|
if (bContainsWater)
|
|
{
|
|
uint WriteToIndex;
|
|
InterlockedAdd(DrawIndirectDataUAV[1], 1, WriteToIndex);
|
|
InterlockedAdd(DispatchIndirectDataUAV[0], 1);
|
|
// Encoding needs to match Lumen reflection tile encoding (see LumenReflection.usf)
|
|
WaterTileListDataUAV[WriteToIndex] = PackTileCoord12bits(GroupId.xy);
|
|
}
|
|
}
|
|
|
|
#else
|
|
|
|
uint2 PixelPos = ThreadId.xy + View.ViewRectMin.xy;
|
|
bool bContainsWater = false;
|
|
if(ThreadId.x < uint(View.BufferSizeAndInvSize.x) && ThreadId.y < uint(View.BufferSizeAndInvSize.y))
|
|
{
|
|
bContainsWater = DoesPixelContainWater(PixelPos);
|
|
}
|
|
|
|
bool bWriteTile = false;
|
|
|
|
#if COMPILER_SUPPORTS_WAVE_VOTE
|
|
|
|
if (all(GroupThreadId == 0))
|
|
{
|
|
bAnyWaterPixels = 0;
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
const bool bAnyWaterPixelsInWave = WaveActiveAnyTrue(bContainsWater);
|
|
if (WaveIsFirstLane() && bAnyWaterPixelsInWave)
|
|
{
|
|
InterlockedAdd(bAnyWaterPixels, 1);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (all(GroupThreadId == 0))
|
|
{
|
|
bWriteTile = bAnyWaterPixels > 0;
|
|
}
|
|
|
|
#else
|
|
|
|
ContainsWater[GroupThreadIndex] = bContainsWater;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GroupThreadIndex < 32) // 8*8 = 64 elements to merge
|
|
{
|
|
ContainsWater[GroupThreadIndex] = ContainsWater[GroupThreadIndex] || ContainsWater[GroupThreadIndex + 32];
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (GroupThreadIndex < 16)
|
|
{
|
|
ContainsWater[GroupThreadIndex] = ContainsWater[GroupThreadIndex] || ContainsWater[GroupThreadIndex + 16];
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// The smallest wave size is 16 on Intel hardware. So now we can do simple math operations without group sync.
|
|
// EDIT: for some reason group sync is needed until the end, otherwise some pixels are missing...
|
|
|
|
if (GroupThreadIndex < 8)
|
|
{
|
|
ContainsWater[GroupThreadIndex] = ContainsWater[GroupThreadIndex] || ContainsWater[GroupThreadIndex + 8];
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (GroupThreadIndex < 4)
|
|
{
|
|
ContainsWater[GroupThreadIndex] = ContainsWater[GroupThreadIndex] || ContainsWater[GroupThreadIndex + 4];
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (GroupThreadIndex < 2)
|
|
{
|
|
ContainsWater[GroupThreadIndex] = ContainsWater[GroupThreadIndex] || ContainsWater[GroupThreadIndex + 2];
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (GroupThreadIndex < 1)
|
|
{
|
|
bWriteTile = ContainsWater[GroupThreadIndex] || ContainsWater[GroupThreadIndex + 1];
|
|
}
|
|
|
|
#endif
|
|
|
|
if (bWriteTile)
|
|
{
|
|
// Set bit to indicate tile is occupied.
|
|
uint MaskLinearIndex = TiledViewRes.x * GroupId.y + GroupId.x;
|
|
InterlockedOr(TileMaskBufferOut[MaskLinearIndex / 32U], 1U << (MaskLinearIndex % 32U));
|
|
}
|
|
#endif
|
|
|
|
#if GENERATE_FROXELS
|
|
// Need to get value that's valid for the whole group
|
|
#if COMPILER_SUPPORTS_WAVE_VOTE
|
|
if (bAnyWaterPixels != 0)
|
|
#endif
|
|
{
|
|
float DeviceZ = 0.0f;
|
|
BRANCH
|
|
if (bContainsWater)
|
|
{
|
|
DeviceZ = WaterDepthTexture.Load(uint3(PixelPos, 0)).x;
|
|
}
|
|
// One group per 8x8 tile
|
|
const uint2 GroupTileOffset = GroupId.xy;
|
|
// Only one 8x8 tile per group
|
|
const uint LocalLinearTileId = 0;
|
|
HashBuildFroxelsFromDeviceZ(DeviceZ, GroupThreadIndex, LocalLinearTileId, GroupTileOffset);
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
int2 FullTiledViewRes;
|
|
uint VertexCountPerInstanceIndirect;
|
|
RWBuffer<uint> DrawIndirectDataUAV;
|
|
RWBuffer<uint> DispatchIndirectDataUAV;
|
|
RWBuffer<uint> DispatchClearIndirectDataUAV;
|
|
RWBuffer<uint> WaterTileListDataUAV;
|
|
RWBuffer<uint> ClearTileListDataUAV;
|
|
StructuredBuffer<uint> TileMaskBuffer;
|
|
|
|
groupshared uint SharedNumTiles;
|
|
groupshared uint SharedNumClearTiles;
|
|
groupshared uint SharedTileData[THREADGROUP_SIZE * THREADGROUP_SIZE];
|
|
groupshared uint SharedGlobalTileOffset;
|
|
groupshared uint SharedGlobalClearTileOffset;
|
|
|
|
#ifdef WaterTileClassificationBuildListsCS
|
|
|
|
/**
|
|
* Every group checks 64 tiles and builds a spatially coherent compacted list of water tiles
|
|
*/
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
|
|
void WaterTileClassificationBuildListsCS(
|
|
uint2 GroupId : SV_GroupID,
|
|
uint2 DispatchThreadId : SV_DispatchThreadID,
|
|
uint2 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
const uint LinearThreadIndex = GroupThreadId.y * THREADGROUP_SIZE + GroupThreadId.x;
|
|
|
|
if (all(DispatchThreadId == 0))
|
|
{
|
|
#if DOWNSAMPLE_FACTOR_X == 1 && DOWNSAMPLE_FACTOR_Y == 1
|
|
// TODO compute clear myself
|
|
DrawIndirectDataUAV[0] = VertexCountPerInstanceIndirect; // VertexCountPerInstance
|
|
//DrawIndirectDataUAV[1] // InstanceCount already cleared to 0
|
|
//DrawIndirectDataUAV[2] = 0; // StartVertexLocation " "
|
|
//DrawIndirectDataUAV[3] = 0; // StartInstanceLocation " "
|
|
#endif
|
|
|
|
DispatchIndirectDataUAV[1] = 1;
|
|
DispatchIndirectDataUAV[2] = 1;
|
|
#if OUTPUT_CLEAR_TILES
|
|
DispatchClearIndirectDataUAV[1] = 1;
|
|
DispatchClearIndirectDataUAV[2] = 1;
|
|
#endif
|
|
}
|
|
|
|
if (LinearThreadIndex == 0)
|
|
{
|
|
SharedNumTiles = 0;
|
|
#if OUTPUT_CLEAR_TILES
|
|
SharedNumClearTiles = 0;
|
|
#endif
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
//@todo - parallel version
|
|
if (LinearThreadIndex == 0)
|
|
{
|
|
SharedNumTiles = 0;
|
|
#if OUTPUT_CLEAR_TILES
|
|
SharedNumClearTiles = 0;
|
|
#endif
|
|
|
|
for (uint LocalTileIndex = 0; LocalTileIndex < THREADGROUP_SIZE * THREADGROUP_SIZE; ++LocalTileIndex)
|
|
{
|
|
// ZOrder tiles to maximize screen locality after converting to 1d for compaction
|
|
// The tile locality ultimately affects trace coherency, since trace compaction pulls from neighboring tiles
|
|
uint2 ThreadOffset = ZOrder2D(LocalTileIndex, log2(THREADGROUP_SIZE));
|
|
uint2 TileCoordinate = GroupId * THREADGROUP_SIZE + ThreadOffset;
|
|
|
|
if (all(TileCoordinate < TiledViewRes))
|
|
{
|
|
uint2 FullTileCoord00 = TileCoordinate * uint2(DOWNSAMPLE_FACTOR_X, DOWNSAMPLE_FACTOR_Y);
|
|
bool bTileUsed = false;
|
|
|
|
for (uint OffsetY = 0; OffsetY < DOWNSAMPLE_FACTOR_Y; ++OffsetY)
|
|
{
|
|
uint2 FullTileCoord = FullTileCoord00 + uint2(0, OffsetY);
|
|
|
|
if (FullTileCoord.y < FullTiledViewRes.y)
|
|
{
|
|
uint MaskLinearIndex = FullTiledViewRes.x * FullTileCoord.y + FullTileCoord.x;
|
|
uint Mask = 1u << (MaskLinearIndex % 32u);
|
|
uint MaskDword = TileMaskBuffer[MaskLinearIndex / 32u];
|
|
#if DOWNSAMPLE_FACTOR_X == 2
|
|
Mask |= select(FullTileCoord.x + 1 < FullTiledViewRes.x, Mask << 1u, Mask);
|
|
#endif
|
|
bTileUsed |= (MaskDword & Mask) != 0;
|
|
|
|
if (bTileUsed)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (bTileUsed)
|
|
{
|
|
uint TileOffset = SharedNumTiles;
|
|
// Encoding needs to match Lumen reflection tile encoding (see LumenReflection.usf)
|
|
SharedTileData[TileOffset] = PackTileCoord12bits(TileCoordinate);
|
|
SharedNumTiles = TileOffset + 1;
|
|
}
|
|
else
|
|
{
|
|
#if OUTPUT_CLEAR_TILES
|
|
// Pack clear tiles from the other end
|
|
uint TileOffset = SharedNumClearTiles;
|
|
SharedTileData[THREADGROUP_SIZE * THREADGROUP_SIZE - 1 - TileOffset] = PackTileCoord12bits(TileCoordinate);
|
|
SharedNumClearTiles = TileOffset + 1;
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Allocate space in the tile list
|
|
if (LinearThreadIndex == 0 && SharedNumTiles > 0)
|
|
{
|
|
#if DOWNSAMPLE_FACTOR_X == 1 && DOWNSAMPLE_FACTOR_Y == 1
|
|
InterlockedAdd(DrawIndirectDataUAV[1], SharedNumTiles);
|
|
#endif
|
|
InterlockedAdd(DispatchIndirectDataUAV[0], SharedNumTiles, SharedGlobalTileOffset);
|
|
}
|
|
|
|
#if OUTPUT_CLEAR_TILES
|
|
if (LinearThreadIndex == 0 && SharedNumClearTiles > 0)
|
|
{
|
|
InterlockedAdd(DispatchClearIndirectDataUAV[0], SharedNumClearTiles, SharedGlobalClearTileOffset);
|
|
}
|
|
#endif
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Write out tiles
|
|
if (LinearThreadIndex < SharedNumTiles)
|
|
{
|
|
WaterTileListDataUAV[SharedGlobalTileOffset + LinearThreadIndex] = SharedTileData[LinearThreadIndex];
|
|
}
|
|
else
|
|
{
|
|
#if OUTPUT_CLEAR_TILES
|
|
uint LocalThreadIndex = LinearThreadIndex - SharedNumTiles;
|
|
if (LocalThreadIndex < SharedNumClearTiles)
|
|
{
|
|
ClearTileListDataUAV[SharedGlobalClearTileOffset + LocalThreadIndex] = SharedTileData[THREADGROUP_SIZE * THREADGROUP_SIZE - 1 - LocalThreadIndex];
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
#endif //WaterTileClassificationBuildListsCS
|
|
|
|
#elif defined(TILE_VERTEX_SHADER)
|
|
|
|
|
|
Buffer<uint> TileListData;
|
|
|
|
void WaterTileVS(
|
|
in uint InstanceId : SV_InstanceID,
|
|
in uint VertexId : SV_VertexID,
|
|
out float4 Position : SV_POSITION)
|
|
{
|
|
const uint PackedTile = TileListData[InstanceId.x];
|
|
const uint2 TileCoord = UnpackTileCoord12bits(PackedTile);
|
|
const uint2 TileOrigin = TileCoord * SLW_TILE_SIZE_XY;
|
|
|
|
uint2 TileVertex = TileOrigin;
|
|
TileVertex.x += VertexId == 1 || VertexId == 2 || VertexId == 4 ? SLW_TILE_SIZE_XY : 0;
|
|
TileVertex.y += VertexId == 2 || VertexId == 4 || VertexId == 5 ? SLW_TILE_SIZE_XY : 0;
|
|
|
|
// View port is set on the view rect. So no offset are needed.
|
|
Position = float4(float2(TileVertex) * View.ViewSizeAndInvSize.zw * float2(2.0f, -2.0f) + float2(-1.0, 1.0f), 0.5f, 1.0f);
|
|
}
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
void SingleLayerWaterCompositePS(
|
|
in float4 SvPosition : SV_Position,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
ResolvedView = ResolveView();
|
|
|
|
float2 BufferUV = SvPositionToBufferUV(SvPosition);
|
|
float2 ScreenPosition = SvPositionToScreenPosition(SvPosition).xy;
|
|
|
|
// TODO use dual source blending to apply TransmittanceToScene
|
|
SingleLayerWaterCompositeOutput Result = SingleLayerWaterComposite(BufferUV, ScreenPosition, SvPosition);
|
|
|
|
clip(Result.Clip); // Since this shader does not write to depth or stencil it should still benefit from EarlyZ (See AMD depth-in-depth documentation)
|
|
OutColor = Result.LuminanceTransmittance;
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
Texture2D<float4> SceneColorCopyDownsampleTexture;
|
|
SamplerState SceneColorCopyDownsampleSampler;
|
|
Texture2D<float4> SceneDepthCopyDownsampleTexture;
|
|
SamplerState SceneDepthCopyDownsampleSampler;
|
|
float2 SVPositionToSourceTextureUV;
|
|
|
|
void WaterRefractionCopyPS(
|
|
in float4 SVPosition : SV_Position
|
|
#if COPY_DEPTH
|
|
, out float OutSceneDepth : SV_Target0
|
|
#elif COPY_COLOR
|
|
, out float4 OutSceneColor : SV_Target0
|
|
#endif
|
|
#if COPY_DEPTH && COPY_COLOR
|
|
, out float4 OutSceneColor : SV_Target1
|
|
#endif
|
|
)
|
|
{
|
|
float2 BufferUV = SVPosition.xy * SVPositionToSourceTextureUV;
|
|
|
|
#if COPY_DEPTH
|
|
#if DOWNSAMPLE_REFRACTION
|
|
// Pick furthermost depth to minimize downsampling artifacts (UE4 is using reverse-z)
|
|
float4 Depth4 = SceneDepthCopyDownsampleTexture.Gather(SceneDepthCopyDownsampleSampler, BufferUV);
|
|
float DeviceDepthZ = min(min(min(Depth4.x, Depth4.y), Depth4.z), Depth4.w);
|
|
#else
|
|
float DeviceDepthZ = Texture2DSampleLevel(SceneDepthCopyDownsampleTexture, SceneDepthCopyDownsampleSampler, BufferUV, 0).x;
|
|
#endif
|
|
OutSceneDepth = DeviceDepthZ;
|
|
#endif
|
|
|
|
#if COPY_COLOR
|
|
#if DOWNSAMPLE_REFRACTION || !SUPPORTS_INDEPENDENT_SAMPLERS
|
|
// Downsample and average all 2x2 quads
|
|
OutSceneColor.xyz = Texture2DSampleLevel(SceneColorCopyDownsampleTexture, SceneColorCopyDownsampleSampler, BufferUV, 0).xyz;
|
|
#else
|
|
// Use the depth point sampler when not downsampling color
|
|
OutSceneColor.xyz = Texture2DSampleLevel(SceneColorCopyDownsampleTexture, SceneDepthCopyDownsampleSampler, BufferUV, 0).xyz;
|
|
#endif
|
|
|
|
OutSceneColor.w = 1.0f;
|
|
#endif
|
|
}
|
|
|